diff --git a/src/app/api/openrouter/[...path]/route.ts b/src/app/api/openrouter/[...path]/route.ts index c7eb42c116..d5986ec5bc 100644 --- a/src/app/api/openrouter/[...path]/route.ts +++ b/src/app/api/openrouter/[...path]/route.ts @@ -39,7 +39,6 @@ import { } from '@/lib/llm-proxy-helpers'; import { getBalanceAndOrgSettings } from '@/lib/organizations/organization-usage'; import { ENABLE_TOOL_REPAIR, repairTools } from '@/lib/tool-calling'; -import { isRateLimitedToDeathFree } from '@/lib/providers/openrouter'; import { isFreePromptTrainingAllowed } from '@/lib/providers/openrouter/types'; import { redactedModelResponse } from '@/lib/redactedModelResponse'; import { @@ -241,10 +240,6 @@ export async function POST(request: NextRequest): Promise73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", + "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", "endpoint": { "adapter_name": "AnthropicMessageAdapter", "can_abort": true, @@ -7780,8 +7755,7 @@ }, "features": { "supported_parameters": {}, - "supports_input_audio": false, - "supports_native_web_search": true, + "supports_file_urls": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -7790,23 +7764,23 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "41d2915a-92e6-4993-b537-210b4e10cba8", + "has_completions": false, + "id": "1347c8dc-12f5-47df-8355-3ec7e80a8c67", "is_byok": false, "is_deranked": true, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 125, + "limit_rpm": 40, "limit_rpm_cf": null, - "max_completion_tokens": 64000, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "anthropic", "context_length": 200000, - "created_at": "2025-10-15T17:00:38+00:00", + "created_at": "2025-02-24T18:35:10.00008+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -7814,7 +7788,7 @@ }, "default_stops": [], "default_system": null, - "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\n\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", + "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", "features": { "chat_template_config": {}, "reasoning_config": { @@ -7828,37 +7802,33 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": null, - "name": "Anthropic: Claude Haiku 4.5", + "model_version_group_id": "30636d20-cda3-4a59-aa0c-1a5b6efba072", + "name": "Anthropic: Claude 3.7 Sonnet", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.5-haiku-20251001", + "permaslug": "anthropic/claude-3-7-sonnet-20250219", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude Haiku 4.5", - "slug": "anthropic/claude-haiku-4.5", - "updated_at": "2025-12-05T21:53:18.541396+00:00", + "short_name": "Claude 3.7 Sonnet", + "slug": "anthropic/claude-3.7-sonnet", + "updated_at": "2025-12-05T21:54:07.586262+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-4.5-haiku-20251001", - "model_variant_slug": "anthropic/claude-haiku-4.5", + "model_variant_permaslug": "anthropic/claude-3-7-sonnet-20250219", + "model_variant_slug": "anthropic/claude-3.7-sonnet", "moderation_required": true, - "name": "Anthropic | anthropic/claude-4.5-haiku-20251001", + "name": "Anthropic | anthropic/claude-3-7-sonnet-20250219", "pricing": { - "completion": "0.000005", + "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000001", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000003", + "prompt": "0.000003", + "web_search": "0.01" }, "provider_display_name": "Anthropic", "provider_info": { @@ -7890,7 +7860,7 @@ "slug": "anthropic", "statusPageUrl": "https://status.anthropic.com/" }, - "provider_model_id": "claude-haiku-4-5-20251001", + "provider_model_id": "claude-3-7-sonnet-20250219", "provider_name": "Anthropic", "provider_region": null, "provider_slug": "anthropic", @@ -7903,8 +7873,7 @@ "reasoning", "include_reasoning", "tools", - "tool_choice", - "top_k" + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, @@ -7925,27 +7894,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": null, - "name": "Anthropic: Claude Haiku 4.5", + "model_version_group_id": "30636d20-cda3-4a59-aa0c-1a5b6efba072", + "name": "Anthropic: Claude 3.7 Sonnet", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.5-haiku-20251001", + "permaslug": "anthropic/claude-3-7-sonnet-20250219", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude Haiku 4.5", - "slug": "anthropic/claude-haiku-4.5", - "updated_at": "2025-12-05T21:53:18.541396+00:00", + "short_name": "Claude 3.7 Sonnet", + "slug": "anthropic/claude-3.7-sonnet", + "updated_at": "2025-12-05T21:54:07.586262+00:00", "warning_message": null }, { "author": "anthropic", "context_length": 200000, - "created_at": "2025-05-22T16:27:25.029961+00:00", + "created_at": "2025-10-15T17:00:38+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -7953,7 +7922,7 @@ }, "default_stops": [], "default_system": null, - "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", + "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\n\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", "endpoint": { "adapter_name": "AnthropicMessageAdapter", "can_abort": true, @@ -7968,7 +7937,7 @@ }, "features": { "supported_parameters": {}, - "supports_file_urls": true, + "supports_input_audio": false, "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, @@ -7978,23 +7947,23 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "9ea0cd22-4494-4a94-9199-c83c992bdbe1", + "has_completions": true, + "id": "41d2915a-92e6-4993-b537-210b4e10cba8", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 125, "limit_rpm_cf": null, - "max_completion_tokens": 32000, + "max_completion_tokens": 64000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "anthropic", "context_length": 200000, - "created_at": "2025-05-22T16:27:25.029961+00:00", + "created_at": "2025-10-15T17:00:38+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -8002,8 +7971,9 @@ }, "default_stops": [], "default_system": null, - "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", + "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\n\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -8015,36 +7985,32 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Opus 4", + "name": "Anthropic: Claude Haiku 4.5", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4-opus-20250522", + "permaslug": "anthropic/claude-4.5-haiku-20251001", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude Opus 4", - "slug": "anthropic/claude-opus-4", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Claude Haiku 4.5", + "slug": "anthropic/claude-haiku-4.5", + "updated_at": "2025-12-05T21:53:18.541396+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-4-opus-20250522", - "model_variant_slug": "anthropic/claude-opus-4", + "model_variant_permaslug": "anthropic/claude-4.5-haiku-20251001", + "model_variant_slug": "anthropic/claude-haiku-4.5", "moderation_required": true, - "name": "Anthropic | anthropic/claude-4-opus-20250522", + "name": "Anthropic | anthropic/claude-4.5-haiku-20251001", "pricing": { - "completion": "0.000075", + "completion": "0.000005", "discount": 0, - "image": "0.024", - "image_output": "0", - "input_cache_read": "0.0000015", - "internal_reasoning": "0", - "prompt": "0.000015", - "request": "0", + "input_cache_read": "0.0000001", + "prompt": "0.000001", "web_search": "0.01" }, "provider_display_name": "Anthropic", @@ -8077,7 +8043,7 @@ "slug": "anthropic", "statusPageUrl": "https://status.anthropic.com/" }, - "provider_model_id": "claude-opus-4-20250514", + "provider_model_id": "claude-haiku-4-5-20251001", "provider_name": "Anthropic", "provider_region": null, "provider_slug": "anthropic", @@ -8090,7 +8056,10 @@ "reasoning", "include_reasoning", "tools", - "tool_choice" + "tool_choice", + "top_k", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, @@ -8099,6 +8068,7 @@ "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -8110,27 +8080,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Opus 4", + "name": "Anthropic: Claude Haiku 4.5", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4-opus-20250522", + "permaslug": "anthropic/claude-4.5-haiku-20251001", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude Opus 4", - "slug": "anthropic/claude-opus-4", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Claude Haiku 4.5", + "slug": "anthropic/claude-haiku-4.5", + "updated_at": "2025-12-05T21:53:18.541396+00:00", "warning_message": null }, { "author": "anthropic", "context_length": 200000, - "created_at": "2025-08-05T16:33:11.634562+00:00", + "created_at": "2025-05-22T16:27:25.029961+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -8138,7 +8108,7 @@ }, "default_stops": [], "default_system": null, - "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", + "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", "endpoint": { "adapter_name": "AnthropicMessageAdapter", "can_abort": true, @@ -8154,7 +8124,6 @@ "features": { "supported_parameters": {}, "supports_file_urls": true, - "supports_input_audio": false, "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, @@ -8165,7 +8134,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "ca4e491c-208c-4bd0-b808-35e0ad56bc52", + "id": "9ea0cd22-4494-4a94-9199-c83c992bdbe1", "is_byok": false, "is_deranked": true, "is_disabled": false, @@ -8180,7 +8149,7 @@ "model": { "author": "anthropic", "context_length": 200000, - "created_at": "2025-08-05T16:33:11.634562+00:00", + "created_at": "2025-05-22T16:27:25.029961+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -8188,7 +8157,7 @@ }, "default_stops": [], "default_system": null, - "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", + "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", "features": { "reasoning_config": { "end_token": null, @@ -8204,33 +8173,211 @@ "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Opus 4.1", + "name": "Anthropic: Claude Opus 4", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.1-opus-20250805", + "permaslug": "anthropic/claude-4-opus-20250522", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude Opus 4.1", - "slug": "anthropic/claude-opus-4.1", + "short_name": "Claude Opus 4", + "slug": "anthropic/claude-opus-4", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-4.1-opus-20250805", - "model_variant_slug": "anthropic/claude-opus-4.1", + "model_variant_permaslug": "anthropic/claude-4-opus-20250522", + "model_variant_slug": "anthropic/claude-opus-4", "moderation_required": true, - "name": "Anthropic | anthropic/claude-4.1-opus-20250805", + "name": "Anthropic | anthropic/claude-4-opus-20250522", + "pricing": { + "completion": "0.000075", + "discount": 0, + "input_cache_read": "0.0000015", + "prompt": "0.000015", + "web_search": "0.01" + }, + "provider_display_name": "Anthropic", + "provider_info": { + "adapterName": "AnthropicMessageAdapter", + "baseUrl": "https://api.anthropic.com/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://www.anthropic.com/legal/privacy", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://www.anthropic.com/legal/commercial-terms", + "training": false + }, + "displayName": "Anthropic", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": true, + "headquarters": "US", + "icon": { + "url": "/images/icons/Anthropic.svg" + }, + "ignoredProviderModels": [], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": true, + "name": "Anthropic", + "owners": ["{}"], + "slug": "anthropic", + "statusPageUrl": "https://status.anthropic.com/" + }, + "provider_model_id": "claude-opus-4-20250514", + "provider_name": "Anthropic", + "provider_region": null, + "provider_slug": "anthropic", + "quantization": "unknown", + "supported_parameters": [ + "max_tokens", + "top_p", + "temperature", + "stop", + "reasoning", + "include_reasoning", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Claude", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["image", "text", "file"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Anthropic: Claude Opus 4", + "output_modalities": ["text"], + "permaslug": "anthropic/claude-4-opus-20250522", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, + "router": null, + "short_name": "Claude Opus 4", + "slug": "anthropic/claude-opus-4", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + { + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-08-05T16:33:11.634562+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", + "endpoint": { + "adapter_name": "AnthropicMessageAdapter", + "can_abort": true, + "context_length": 200000, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://www.anthropic.com/legal/privacy", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://www.anthropic.com/legal/commercial-terms", + "training": false + }, + "features": { + "supported_parameters": {}, + "supports_file_urls": true, + "supports_input_audio": false, + "supports_native_web_search": true, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "ca4e491c-208c-4bd0-b808-35e0ad56bc52", + "is_byok": false, + "is_deranked": true, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 32000, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-08-05T16:33:11.634562+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Claude", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["image", "text", "file"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Anthropic: Claude Opus 4.1", + "output_modalities": ["text"], + "permaslug": "anthropic/claude-4.1-opus-20250805", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, + "router": null, + "short_name": "Claude Opus 4.1", + "slug": "anthropic/claude-opus-4.1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "anthropic/claude-4.1-opus-20250805", + "model_variant_slug": "anthropic/claude-opus-4.1", + "moderation_required": true, + "name": "Anthropic | anthropic/claude-4.1-opus-20250805", "pricing": { "completion": "0.000075", "discount": 0, - "image": "0.024", - "image_output": "0", "input_cache_read": "0.0000015", - "internal_reasoning": "0", "prompt": "0.000015", - "request": "0", "web_search": "0.01" }, "provider_display_name": "Anthropic", @@ -8353,12 +8500,12 @@ "has_completions": true, "id": "be883404-eb42-4b2d-b6e4-c7daa3aa8d62", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 75, + "limit_rpm": null, "limit_rpm_cf": null, "max_completion_tokens": 64000, "max_prompt_tokens": null, @@ -8402,7 +8549,7 @@ "router": null, "short_name": "Claude Opus 4.5", "slug": "anthropic/claude-opus-4.5", - "updated_at": "2025-12-05T21:53:06.931607+00:00", + "updated_at": "2026-01-15T17:57:03.680811+00:00", "warning_message": null }, "model_variant_permaslug": "anthropic/claude-4.5-opus-20251124", @@ -8412,12 +8559,8 @@ "pricing": { "completion": "0.000025", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.0000005", - "internal_reasoning": "0", "prompt": "0.000005", - "request": "0", "web_search": "0.01" }, "provider_display_name": "Anthropic", @@ -8500,7 +8643,7 @@ "router": null, "short_name": "Claude Opus 4.5", "slug": "anthropic/claude-opus-4.5", - "updated_at": "2025-12-05T21:53:06.931607+00:00", + "updated_at": "2026-01-15T17:57:03.680811+00:00", "warning_message": null }, { @@ -8601,12 +8744,8 @@ "pricing": { "completion": "0.000015", "discount": 0, - "image": "0.0048", - "image_output": "0", "input_cache_read": "0.0000003", - "internal_reasoning": "0", "prompt": "0.000003", - "request": "0", "web_search": "0.01" }, "provider_display_name": "Anthropic", @@ -8797,13 +8936,9 @@ "pricing": { "completion": "0.000015", "discount": 0, - "image": "0.0048", - "image_output": "0", "input_cache_read": "0.0000003", - "internal_reasoning": "0", "prompt": "0.000003", - "request": "0", - "web_search": "0" + "web_search": "0.01" }, "provider_display_name": "Anthropic", "provider_info": { @@ -8896,6 +9031,145 @@ "slug": "anthropic/claude-sonnet-4.5", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null + }, + { + "author": "Other", + "context_length": 200000, + "created_at": "2025-08-26T20:08:47.000Z", + "default_parameters": null, + "default_stops": [], + "default_system": null, + "description": "Free version of Claude Opus 4.6 for use in Kilo for Slack only", + "endpoint": { + "adapter_name": "other", + "can_abort": true, + "context_length": 200000, + "data_policy": { + "canPublish": false, + "retainsPrompts": true, + "training": true + }, + "features": null, + "has_chat_completions": true, + "has_completions": false, + "id": "anthropic/claude-opus-4.6:slackbot", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 32000, + "max_prompt_images": null, + "max_prompt_tokens": 200000, + "max_tokens_per_image": null, + "model": { + "author": "Other", + "context_length": 200000, + "created_at": "2025-08-26T20:08:47.000Z", + "default_parameters": null, + "default_stops": [], + "default_system": null, + "description": "Free version of Claude Opus 4.6 for use in Kilo for Slack only", + "features": null, + "group": "other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Claude Opus 4.6 (Free for Kilo for Slack)", + "output_modalities": ["text"], + "permaslug": "anthropic/claude-opus-4.6:slackbot", + "reasoning_config": null, + "router": null, + "short_name": "Claude Opus 4.6 (Free for Kilo for Slack)", + "slug": "anthropic/claude-opus-4.6:slackbot", + "updated_at": "2026-02-10T10:49:03.904Z", + "warning_message": null + }, + "model_variant_permaslug": "anthropic/claude-opus-4.6:slackbot", + "model_variant_slug": "anthropic/claude-opus-4.6:slackbot", + "moderation_required": false, + "name": "Claude Opus 4.6 (Free for Kilo for Slack)", + "pricing": { + "completion": "0.0000000", + "discount": 0, + "image": "0", + "image_output": "0", + "internal_reasoning": "0", + "prompt": "0.0000000", + "request": "0", + "web_search": "0" + }, + "provider_display_name": "Other", + "provider_info": { + "adapterName": "other", + "baseUrl": "https://kilo.ai", + "byokEnabled": false, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": true + }, + "displayName": "Other", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": false, + "headquarters": "Unknown", + "icon": { + "className": "rounded-sm", + "url": "https://via.placeholder.com/32x32/000000/FFFFFF?text=S" + }, + "ignoredProviderModels": [], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Other", + "owners": [], + "slug": "other", + "statusPageUrl": null + }, + "provider_model_id": "anthropic/claude-opus-4.6:slackbot", + "provider_name": "Other", + "provider_region": null, + "provider_slug": "other", + "quantization": null, + "supported_parameters": [ + "max_tokens", + "temperature", + "tools", + "reasoning", + "include_reasoning" + ], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "default" + }, + "features": null, + "group": "other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Claude Opus 4.6 (Free for Kilo for Slack)", + "output_modalities": ["text"], + "permaslug": "anthropic/claude-opus-4.6:slackbot", + "reasoning_config": null, + "router": null, + "short_name": "Claude Opus 4.6 (Free for Kilo for Slack)", + "slug": "anthropic/claude-opus-4.6", + "updated_at": "2026-02-10T10:49:03.904Z", + "warning_message": null } ], "name": "Anthropic", @@ -8913,7 +9187,352 @@ "icon": { "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.arcee.ai/&size=256" }, - "models": [], + "models": [ + { + "author": "arcee-ai", + "context_length": 131000, + "created_at": "2026-01-27T22:24:30+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.8, + "top_p": 0.8 + }, + "default_stops": [], + "default_system": null, + "description": "Trinity-Large-Preview is a frontier-scale open-weight language model from Arcee, built as a 400B-parameter sparse Mixture-of-Experts with 13B active parameters per token using 4-of-256 expert routing. \n\nIt excels in creative writing, storytelling, role-play, chat scenarios, and real-time voice assistance, better than your average reasoning model usually can. But we’re also introducing some of our newer agentic performance. It was trained to navigate well in agent harnesses like OpenCode, Cline, and Kilo Code, and to handle complex toolchains and long, constraint-filled prompts. \n\nThe architecture natively supports very long context windows up to 512k tokens, with the Preview API currently served at 128k context using 8-bit quantization for practical deployment. Trinity-Large-Preview reflects Arcee’s efficiency-first design philosophy, offering a production-oriented frontier model with open weights and permissive licensing suitable for real-world applications and experimentation.", + "endpoint": { + "adapter_name": "ClarifaiAdapter", + "can_abort": false, + "context_length": 131000, + "data_policy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "features": { + "disable_free_endpoint_limits": true, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": false, + "literal_required": false, + "type_function": false + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "239c00d5-c404-4d17-b243-6a645212dc95", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": 600, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "arcee-ai", + "context_length": 131000, + "created_at": "2026-01-27T22:24:30+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.8, + "top_p": 0.8 + }, + "default_stops": [], + "default_system": null, + "description": "Trinity-Large-Preview is a frontier-scale open-weight language model from Arcee, built as a 400B-parameter sparse Mixture-of-Experts with 13B active parameters per token using 4-of-256 expert routing. \n\nIt excels in creative writing, storytelling, role-play, chat scenarios, and real-time voice assistance, better than your average reasoning model usually can. But we’re also introducing some of our newer agentic performance. It was trained to navigate well in agent harnesses like OpenCode, Cline, and Kilo Code, and to handle complex toolchains and long, constraint-filled prompts. \n\nThe architecture natively supports very long context windows up to 512k tokens, with the Preview API currently served at 128k context using 8-bit quantization for practical deployment. Trinity-Large-Preview reflects Arcee’s efficiency-first design philosophy, offering a production-oriented frontier model with open weights and permissive licensing suitable for real-world applications and experimentation.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "arcee-ai/Trinity-Large-Preview", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Arcee AI: Trinity Large Preview", + "output_modalities": ["text"], + "permaslug": "arcee-ai/trinity-large-preview", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Trinity Large Preview", + "slug": "arcee-ai/trinity-large-preview", + "updated_at": "2026-01-28T00:57:40.622658+00:00", + "warning_message": null + }, + "model_variant_permaslug": "arcee-ai/trinity-large-preview:free", + "model_variant_slug": "arcee-ai/trinity-large-preview:free", + "moderation_required": false, + "name": "Arcee AI | arcee-ai/trinity-large-preview:free", + "pricing": { + "completion": "0", + "discount": 0, + "prompt": "0" + }, + "provider_display_name": "Arcee AI", + "provider_info": { + "adapterName": "ClarifaiAdapter", + "baseUrl": "https://api.pinference.ai/api/v1/", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Arcee AI", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": false, + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.arcee.ai/&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Arcee AI", + "owners": [], + "slug": "arcee-ai", + "statusPageUrl": null + }, + "provider_model_id": "arcee-ai/trinity-large-preview", + "provider_name": "Arcee AI", + "provider_region": null, + "provider_slug": "arcee-ai", + "quantization": "unknown", + "supported_parameters": [ + "max_tokens", + "temperature", + "top_k", + "top_p", + "tools", + "structured_outputs", + "response_format" + ], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "free" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "arcee-ai/Trinity-Large-Preview", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Arcee AI: Trinity Large Preview (free)", + "output_modalities": ["text"], + "permaslug": "arcee-ai/trinity-large-preview", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Trinity Large Preview (free)", + "slug": "arcee-ai/trinity-large-preview", + "updated_at": "2026-01-28T00:57:40.622658+00:00", + "warning_message": null + }, + { + "author": "arcee-ai", + "context_length": 131072, + "created_at": "2025-12-01T15:08:40+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.15, + "top_p": 0.75 + }, + "default_stops": [], + "default_system": null, + "description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function calling and multi-step agent workflows.", + "endpoint": { + "adapter_name": "ClarifaiAdapter", + "can_abort": false, + "context_length": 131072, + "data_policy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "features": { + "is_mandatory_reasoning": true, + "supports_input_audio": false, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": false, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "57299f47-f9cd-460d-b2fb-8480a99fe88e", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "arcee-ai", + "context_length": 131072, + "created_at": "2025-12-01T15:08:40+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.15, + "top_p": 0.75 + }, + "default_stops": [], + "default_system": null, + "description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function calling and multi-step agent workflows.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "arcee-ai/Trinity-Mini", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Arcee AI: Trinity Mini", + "output_modalities": ["text"], + "permaslug": "arcee-ai/trinity-mini-20251201", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "Trinity Mini", + "slug": "arcee-ai/trinity-mini", + "updated_at": "2026-01-08T19:23:52.555156+00:00", + "warning_message": null + }, + "model_variant_permaslug": "arcee-ai/trinity-mini-20251201:free", + "model_variant_slug": "arcee-ai/trinity-mini:free", + "moderation_required": false, + "name": "Arcee AI | arcee-ai/trinity-mini-20251201:free", + "pricing": { + "completion": "0", + "discount": 0, + "prompt": "0" + }, + "provider_display_name": "Arcee AI", + "provider_info": { + "adapterName": "ClarifaiAdapter", + "baseUrl": "https://api.clarifai.com/v2/ext/openai/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Arcee AI", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": false, + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.arcee.ai/&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Arcee AI", + "owners": [], + "slug": "arcee-ai", + "statusPageUrl": null + }, + "provider_model_id": "arcee_ai/AFM/models/trinity-mini", + "provider_name": "Arcee AI", + "provider_region": null, + "provider_slug": "arcee-ai/bf16", + "quantization": "bf16", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_k", + "top_p", + "tool_choice", + "tools", + "structured_outputs", + "response_format" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "free" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "arcee-ai/Trinity-Mini", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Arcee AI: Trinity Mini (free)", + "output_modalities": ["text"], + "permaslug": "arcee-ai/trinity-mini-20251201", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "Trinity Mini (free)", + "slug": "arcee-ai/trinity-mini", + "updated_at": "2026-01-08T19:23:52.555156+00:00", + "warning_message": null + } + ], "name": "Arcee AI", "slug": "arcee-ai" }, @@ -8970,7 +9589,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -9007,12 +9626,8 @@ "pricing": { "completion": "0.00000088", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000216", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000018", + "prompt": "0.000000216" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -9195,7 +9810,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -9320,14 +9948,10 @@ "moderation_required": false, "name": "AtlasCloud | deepseek/deepseek-chat-v3.1", "pricing": { - "completion": "0.0000008", + "completion": "0.00000095", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000021", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000013", + "prompt": "0.0000003" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -9510,7 +10134,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -9662,14 +10299,10 @@ "moderation_required": false, "name": "AtlasCloud | deepseek/deepseek-v3.1-terminus", "pricing": { - "completion": "0.0000008", + "completion": "0.00000095", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000021", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000013", + "prompt": "0.0000003" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -9852,7 +10485,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -10006,12 +10652,8 @@ "pricing": { "completion": "0.00000038", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000026", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.00000026" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -10194,7 +10836,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -10350,14 +11005,10 @@ "moderation_required": false, "name": "AtlasCloud | deepseek/deepseek-v3.2-exp", "pricing": { - "completion": "0.00000032", + "completion": "0.00000041", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000021", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000027", + "prompt": "0.00000027" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -10540,7 +11191,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -10692,14 +11356,10 @@ "moderation_required": false, "name": "AtlasCloud | deepseek/deepseek-v3.2-speciale-20251201", "pricing": { - "completion": "0.0000004", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000028", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000002", + "prompt": "0.0000004" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -10882,7 +11542,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -11033,13 +11706,8 @@ "pricing": { "completion": "0.00000215", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000045", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000035", + "prompt": "0.00000055" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -11222,7 +11890,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -11378,13 +12059,8 @@ "pricing": { "completion": "0.000000828", "discount": 0.31, - "image": "0", - "image_output": "0", "input_cache_read": "0.0000000414", - "internal_reasoning": "0", - "prompt": "0.000000207", - "request": "0", - "web_search": "0" + "prompt": "0.000000207" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -11567,7 +12243,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -11666,7 +12355,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -11713,12 +12402,8 @@ "pricing": { "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000002", + "prompt": "0.0000002" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -11901,7 +12586,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -12050,13 +12748,8 @@ "pricing": { "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.000000255" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -12239,7 +12932,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -12396,15 +13102,10 @@ "moderation_required": false, "name": "AtlasCloud | minimax/minimax-m2.1", "pricing": { - "completion": "0.0000012", + "completion": "0.00000095", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000029" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -12587,7 +13288,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -12733,12 +13447,8 @@ "pricing": { "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000007", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000007", + "prompt": "0.0000007" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -12921,7 +13631,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -13056,12 +13779,8 @@ "pricing": { "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000006", + "prompt": "0.0000006" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -13244,7 +13963,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -13390,12 +14122,8 @@ "pricing": { "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000006", + "prompt": "0.0000006" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -13578,7 +14306,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -13640,9 +14381,9 @@ "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -13650,11 +14391,11 @@ }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { "adapter_name": "AtlasCloudAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.atlascloud.ai/privacy", @@ -13662,6 +14403,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -13671,7 +14413,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "8b516897-c13e-4925-afc3-44fa86608fe9", + "id": "c1de3dab-1e75-4a69-9c51-fde39ae5bb7e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -13680,13 +14422,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -13694,50 +14436,44 @@ }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b", - "model_variant_slug": "openai/gpt-oss-120b", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "AtlasCloud | openai/gpt-oss-120b", + "name": "AtlasCloud | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.0000002", + "completion": "0.0000026", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000089", + "prompt": "0.0000005" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -13920,7 +14656,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -13930,11 +14679,11 @@ "slug": "atlas-cloud", "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-120b", + "provider_model_id": "moonshotai/kimi-k2.5", "provider_name": "AtlasCloud", "provider_region": null, - "provider_slug": "atlas-cloud/fp8", - "quantization": "fp8", + "provider_slug": "atlas-cloud/int4", + "quantization": "int4", "supported_parameters": [ "reasoning", "include_reasoning", @@ -13951,49 +14700,47 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 128000, - "created_at": "2024-10-16T00:00:00+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": [], "default_system": null, - "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "endpoint": { "adapter_name": "AtlasCloudAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.atlascloud.ai/privacy", @@ -14004,13 +14751,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, + "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": false, - "id": "e79550e7-ba8a-475a-92a9-2df99ee67d41", + "has_completions": true, + "id": "8b516897-c13e-4925-afc3-44fa86608fe9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -14019,64 +14766,60 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "openai", "context_length": 131072, - "created_at": "2024-10-16T00:00:00+00:00", + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": [], "default_system": null, - "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen", + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-7B-Instruct", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 7B Instruct", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-7b-instruct", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen2.5 7B Instruct", - "slug": "qwen/qwen-2.5-7b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen-2.5-7b-instruct", - "model_variant_slug": "qwen/qwen-2.5-7b-instruct", + "model_variant_permaslug": "openai/gpt-oss-120b", + "model_variant_slug": "openai/gpt-oss-120b", "moderation_required": false, - "name": "AtlasCloud | qwen/qwen-2.5-7b-instruct", + "name": "AtlasCloud | openai/gpt-oss-120b", "pricing": { - "completion": "0.0000001", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000001", + "prompt": "0.0000001" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -14259,7 +15002,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -14269,59 +15025,70 @@ "slug": "atlas-cloud", "statusPageUrl": null }, - "provider_model_id": "qwen/qwen2.5-7b-instruct", + "provider_model_id": "openai/gpt-oss-120b", "provider_name": "AtlasCloud", "provider_region": null, "provider_slug": "atlas-cloud/fp8", "quantization": "fp8", - "supported_parameters": ["max_tokens", "temperature", "top_p", "tools", "tool_choice"], + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p" + ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_reasoning": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen", + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-7B-Instruct", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 7B Instruct", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-7b-instruct", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen2.5 7B Instruct", - "slug": "qwen/qwen-2.5-7b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 131072, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, - "default_stops": [], + "context_length": 128000, + "created_at": "2024-10-16T00:00:00+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "endpoint": { "adapter_name": "AtlasCloudAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.atlascloud.ai/privacy", @@ -14329,20 +15096,16 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, + "literal_required": false, "type_function": true } }, "has_chat_completions": true, - "has_completions": true, - "id": "aaae5602-0949-4bc9-bd77-588b4b285341", + "has_completions": false, + "id": "e79550e7-ba8a-475a-92a9-2df99ee67d41", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -14351,17 +15114,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, - "default_stops": [], + "context_length": 131072, + "created_at": "2024-10-16T00:00:00+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "features": { "reasoning_config": { "end_token": null, @@ -14369,42 +15136,37 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "Qwen/Qwen2.5-7B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Qwen: Qwen2.5 7B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "qwen/qwen-2.5-7b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Qwen2.5 7B Instruct", + "slug": "qwen/qwen-2.5-7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "qwen/qwen-2.5-7b-instruct", + "model_variant_slug": "qwen/qwen-2.5-7b-instruct", "moderation_required": false, - "name": "AtlasCloud | qwen/qwen3-235b-a22b-07-25", + "name": "AtlasCloud | qwen/qwen-2.5-7b-instruct", "pricing": { - "completion": "0.00000088", + "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000011", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000004", + "prompt": "0.00000004" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -14587,7 +15349,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -14597,7 +15372,7 @@ "slug": "atlas-cloud", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "provider_model_id": "qwen/qwen2.5-7b-instruct", "provider_name": "AtlasCloud", "provider_region": null, "provider_slug": "atlas-cloud/fp8", @@ -14616,44 +15391,40 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "Qwen/Qwen2.5-7B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Qwen: Qwen2.5 7B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "qwen/qwen-2.5-7b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Qwen2.5 7B Instruct", + "slug": "qwen/qwen-2.5-7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 128000, - "created_at": "2025-07-25T13:19:17.179049+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "context_length": 131072, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { "adapter_name": "AtlasCloudAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.atlascloud.ai/privacy", @@ -14661,7 +15432,10 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -14670,8 +15444,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "b77fd248-a8a9-42fb-b23b-242965422d1b", + "has_completions": true, + "id": "aaae5602-0949-4bc9-bd77-588b4b285341", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -14686,59 +15460,49 @@ "model": { "author": "qwen", "context_length": 262144, - "created_at": "2025-07-25T13:19:17.179049+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", - "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "AtlasCloud | qwen/qwen3-235b-a22b-thinking-2507", + "name": "AtlasCloud | qwen/qwen3-235b-a22b-07-25", "pricing": { - "completion": "0.0000023", + "completion": "0.00000088", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000002", + "prompt": "0.0000002" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -14921,7 +15685,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -14931,58 +15708,51 @@ "slug": "atlas-cloud", "statusPageUrl": null }, - "provider_model_id": "qwen/qwen3-235b-a22b-thinking-2507", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "provider_name": "AtlasCloud", "provider_region": null, "provider_slug": "atlas-cloud/fp8", "quantization": "fp8", - "supported_parameters": [ - "reasoning", - "include_reasoning", - "max_tokens", - "temperature", - "top_p" - ], + "supported_parameters": ["max_tokens", "temperature", "top_p", "tools", "tool_choice"], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 32000, - "created_at": "2025-04-28T22:16:44.177326+00:00", + "context_length": 128000, + "created_at": "2025-07-25T13:19:17.179049+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -14990,11 +15760,11 @@ }, "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "endpoint": { "adapter_name": "AtlasCloudAdapter", "can_abort": true, - "context_length": 32000, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.atlascloud.ai/privacy", @@ -15002,6 +15772,7 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -15011,7 +15782,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "ab215350-eea0-4c21-9b14-6f29c61f31ea", + "id": "b77fd248-a8a9-42fb-b23b-242965422d1b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -15020,13 +15791,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T22:16:44.177326+00:00", + "context_length": 262144, + "created_at": "2025-07-25T13:19:17.179049+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -15034,7 +15805,7 @@ }, "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -15045,40 +15816,35 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", - "model_variant_slug": "qwen/qwen3-30b-a3b", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", "moderation_required": false, - "name": "AtlasCloud | qwen/qwen3-30b-a3b-04-28", + "name": "AtlasCloud | qwen/qwen3-235b-a22b-thinking-2507", "pricing": { - "completion": "0.00000045", + "completion": "0.0000023", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000002", + "prompt": "0.00000028" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -15261,7 +16027,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -15271,7 +16050,7 @@ "slug": "atlas-cloud", "statusPageUrl": null }, - "provider_model_id": "qwen/qwen3-30b-a3b", + "provider_model_id": "qwen/qwen3-235b-a22b-thinking-2507", "provider_name": "AtlasCloud", "provider_region": null, "provider_slug": "atlas-cloud/fp8", @@ -15281,15 +16060,11 @@ "include_reasoning", "max_tokens", "temperature", - "top_p", - "tools", - "tool_choice", - "structured_outputs", - "response_format" + "top_p" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -15303,24 +16078,24 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, { @@ -15411,12 +16186,8 @@ "pricing": { "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000008", + "prompt": "0.0000001" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -15599,7 +16370,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -15743,13 +16527,8 @@ "pricing": { "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000008", + "prompt": "0.00000008" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -15932,7 +16711,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -16073,14 +16865,10 @@ "moderation_required": false, "name": "AtlasCloud | qwen/qwen3-32b-04-28", "pricing": { - "completion": "0.00000045", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000001", + "prompt": "0.0000001" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -16263,7 +17051,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -16409,15 +17210,10 @@ "moderation_required": false, "name": "AtlasCloud | qwen/qwen3-8b-04-28", "pricing": { - "completion": "0.00000025", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000005", + "prompt": "0.00000005" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -16600,7 +17396,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -16748,15 +17557,10 @@ "moderation_required": false, "name": "AtlasCloud | qwen/qwen3-coder-480b-a35b-07-25", "pricing": { - "completion": "0.0000027", + "completion": "0.0000038", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000035", - "internal_reasoning": "0", - "prompt": "0.00000069", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000002", + "prompt": "0.00000078" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -16939,7 +17743,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -16958,7 +17775,15 @@ "supports_multipart": true, "supports_reasoning": false, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "completions": "0.0000067", + "input_cache_read": "0.0000002", + "prompt": "0.0000014", + "threshold": 32000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, "features": { @@ -17074,12 +17899,8 @@ "pricing": { "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.00000015" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -17262,7 +18083,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -17407,12 +18241,8 @@ "pricing": { "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.00000015" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -17595,7 +18425,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -17748,12 +18591,8 @@ "pricing": { "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000003", + "prompt": "0.0000003" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -17936,7 +18775,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -18092,12 +18944,8 @@ "pricing": { "completion": "0.00000045", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000009", + "prompt": "0.00000009" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -18280,7 +19128,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -18342,6 +19203,356 @@ "updated_at": "2026-01-10T14:09:25.267618+00:00", "warning_message": null }, + { + "author": "xiaomi", + "context_length": 262144, + "created_at": "2025-12-14T16:55:08+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": 0.95 + }, + "default_stops": [], + "default_system": "You are MiMo, an AI assistant developed by Xiaomi.\n\nYour knowledge cutoff date is December 2024.", + "description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config).", + "endpoint": { + "adapter_name": "AtlasCloudAdapter", + "can_abort": true, + "context_length": 262144, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://www.atlascloud.ai/privacy", + "retainsPrompts": false, + "training": false + }, + "features": { + "reasoning_return_mechanism": "reasoning-content", + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": false, + "type_function": false + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "2350232d-2ce5-4a05-9a6f-61982b83c03c", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 65536, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "xiaomi", + "context_length": 262144, + "created_at": "2025-12-14T16:55:08+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": 0.95 + }, + "default_stops": [], + "default_system": "You are MiMo, an AI assistant developed by Xiaomi.\n\nYour knowledge cutoff date is December 2024.", + "description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config).", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "XiaomiMiMo/MiMo-V2-Flash", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Xiaomi: MiMo-V2-Flash", + "output_modalities": ["text"], + "permaslug": "xiaomi/mimo-v2-flash-20251210", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "MiMo-V2-Flash", + "slug": "xiaomi/mimo-v2-flash", + "updated_at": "2026-01-21T16:26:04.702272+00:00", + "warning_message": null + }, + "model_variant_permaslug": "xiaomi/mimo-v2-flash-20251210", + "model_variant_slug": "xiaomi/mimo-v2-flash", + "moderation_required": false, + "name": "AtlasCloud | xiaomi/mimo-v2-flash-20251210", + "pricing": { + "completion": "0.0000003", + "discount": 0, + "prompt": "0.0000001" + }, + "provider_display_name": "AtlasCloud", + "provider_info": { + "adapterName": "AtlasCloudAdapter", + "baseUrl": "https://api.atlascloud.ai/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://www.atlascloud.ai/privacy", + "retainsPrompts": false, + "training": false + }, + "displayName": "AtlasCloud", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": true, + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.atlascloud.ai/&size=256" + }, + "ignoredProviderModels": [ + "gemini-2.5-pro-thinking", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-flash-preview-05-20", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash", + "gemini-2.5-pro", + "gemini-2.5-flash-thinking", + "gemini-2.5-flash-lite", + "claude-opus-4-1-20250805-thinking", + "claude-opus-4-1-20250805", + "claude-opus-4-20250514-thinking", + "claude-opus-4-20250514", + "claude-sonnet-4-20250514-thinking", + "claude-sonnet-4-20250514", + "claude-3-7-sonnet-20250219-thinking", + "claude-3-7-sonnet-20250219", + "claude-3-5-sonnet-20241022", + "claude-3-5-haiku-20241022", + "gemini-2.5-flash-image-preview", + "gpt-4.1", + "gpt-4.1-mini", + "gpt-4o", + "gpt-4o-mini", + "gpt-5", + "gpt-5-nano", + "o1", + "o1-mini", + "o3", + "o3-mini", + "o4-mini", + "grok-3", + "grok-4", + "claude-sonnet-4-5-20250929-thinking", + "claude-sonnet-4-5-20250929", + "doubao-pro-32k", + "doubao-1-5-thinking-pro-250415", + "doubao-1-5-pro-256k-250115", + "doubao-seed-1-6-thinking-250715", + "doubao-seed-1-6-thinking-250615", + "doubao-pro-32k-241215", + "doubao-seed-1-6-flash-250615", + "doubao-lite-32k-character-250228", + "doubao-seed-1-6-250615", + "doubao-1-5-vision-pro-32k-250115", + "doubao-pro-32k-character-241215", + "doubao-1-5-vision-pro-250328", + "doubao-1-5-ui-tars-250428", + "doubao-1-5-thinking-vision-pro-250428", + "doubao-1-5-thinking-pro-m-250428", + "doubao-1.5-pro-256k", + "doubao-1.5-pro-32k", + "doubao-1-5-pro-32k-250115", + "claude-haiku-4-5-20251001-thinking", + "claude-haiku-4-5-20251001", + "gemini-2.5-flash-lite-preview-09-2025-enterprise", + "gemini-2.5-flash-enterprise", + "gemini-2.5-flash-preview-09-2025-enterprise", + "gemini-2.5-flash-lite-enterprise", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-lite-preview-09-2025", + "grok-3-mini-enterprise", + "grok-3-enterprise", + "google/gemini-3-pro-preview", + "OpenAI/gpt-5", + "OpenAI/gpt-5-pro", + "OpenAI/gpt-5-chat", + "OpenAI/gpt-5-codex", + "OpenAI/gpt-5-mini", + "OpenAI/gpt-5-nano", + "google/gemini-2.5-pro-developer", + "google/gemini-2.5-pro-preview-06-05-developer", + "google/gemini-2.5-flash-preview-05-20-developer", + "google/gemini-2.5-flash-lite-preview-06-17-developer", + "google/gemini-2.5-flash-developer", + "google/gemini-2.5-flash-lite-developer", + "xai/grok-3-mini", + "xai/grok-3", + "anthropic/claude-haiku-4.5-20251001-thinking-developer", + "anthropic/claude-haiku-4.5-20251001-developer", + "xai/grok-4-developer", + "google/gemini-2.5-flash-image-preview-developer", + "google/gemini-2.5-flash-preview-202509-developer", + "google/gemini-2.5-flash", + "google/gemini-2.5-flash-preview-202509", + "google/gemini-2.5-flash-lite", + "google/gemini-2.5-flash-lite-preview-202509", + "google/gemini-2.5-flash-lite-preview-202509-developer", + "anthropic/claude-sonnet-4-20250514-thinking-developer", + "anthropic/claude-sonnet-4-20250514-developer", + "anthropic/claude-sonnet-4.5-20250929-developer", + "google/gemini-2.5-pro-preview-0605-developer", + "google/gemini-2.5-flash-preview-0520-developer", + "google/gemini-2.5-flash-lite-preview-0617-developer", + "xai/grok-3-developer", + "anthropic/claude-sonnet-4.5-20250929-thinking-developer", + "anthropic/claude-sonnet-3.7-20250219-thinking-developer", + "anthropic/claude-sonnet-3.7-20250219-developer", + "anthropic/claude-sonnet-3.5-20241022-developer", + "anthropic/claude-haiku-3.5-20241022-developer", + "openai/gpt-5.1", + "openai/gpt-5.1-chat", + "openai/gpt-5.1-codex", + "openai/gpt-5.1-codex-mini", + "google/gemini-3-pro-preview-thinking-developer", + "google/gemini-3-pro-preview-developer", + "openai/gpt-5-developer", + "openai/gpt-5-nano-developer", + "openai/gpt-4o-developer", + "openai/gpt-4o-mini-developer", + "openai/o1-developer", + "openai/o1-mini-developer", + "openai/o3-developer", + "openai/o3-mini-developer", + "openai/o4-mini-developer", + "openai/gpt-4.1-developer", + "openai/gpt-4.1-mini-developer", + "openai/o3-pro", + "openai/o4-mini", + "openai/o3-mini", + "openai/o3", + "openai/o1", + "openai/gpt-4.1", + "openai/gpt-4.1-mini", + "openai/gpt-4.1-nano", + "openai/gpt-4o", + "openai/gpt-4o-mini", + "anthropic/claude-opus-4.5-20251101-developer", + "openai/gpt-5.1-codex-developer", + "openai/gpt-5.1-chat-developer", + "openai/gpt-5.1-developer", + "anthropic/claude-sonnet-4-20250514", + "anthropic/claude-haiku-4.5-20251001", + "anthropic/claude-sonnet-4.5-20250929", + "anthropic/claude-opus-4.1-20250805", + "anthropic/claude-3.7-sonnet-20250219", + "anthropic/claude-opus-4-20250514", + "openai/gpt-5.1-codex-mini-developer", + "openai/gpt-5-pro-developer", + "openai/gpt-5-mini-developer", + "openai/gpt-5-codex-developer", + "anthropic/claude-opus-4-20250514-developer", + "openai/gpt-4.1-nano-developer", + "kwaipilot/kat-coder-air", + "kwaipilot/kat-coder-exp-72b-1010", + "openai/gpt-5", + "openai/gpt-5-chat", + "openai/gpt-5-codex", + "openai/gpt-5-mini", + "openai/gpt-5-nano", + "openai/gpt-5-pro", + "openai/gpt-5.1-codex-max", + "openai/gpt-5.2", + "openai/gpt-5.2-chat", + "google/gemini-2.5-pro", + "openai/gpt-5.2-developer", + "anthropic/claude-opus-4.5-20251101", + "openai/gpt-image-1-developer", + "google/gemini-3-flash-preview-developer", + "google/gemini-3-flash-preview", + "google/gemini-2.0-flash", + "google/gemini-2.0-flash-lite", + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" + ], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "AtlasCloud", + "owners": ["{}"], + "slug": "atlas-cloud", + "statusPageUrl": null + }, + "provider_model_id": "xiaomi/mimo-v2-flash", + "provider_name": "AtlasCloud", + "provider_region": null, + "provider_slug": "atlas-cloud/fp8", + "quantization": "fp8", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "XiaomiMiMo/MiMo-V2-Flash", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Xiaomi: MiMo-V2-Flash", + "output_modalities": ["text"], + "permaslug": "xiaomi/mimo-v2-flash-20251210", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "MiMo-V2-Flash", + "slug": "xiaomi/mimo-v2-flash", + "updated_at": "2026-01-21T16:26:04.702272+00:00", + "warning_message": null + }, { "author": "z-ai", "context_length": 202752, @@ -18365,6 +19576,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supported_parameters": { "response_format": true, "structured_outputs": true @@ -18439,13 +19651,8 @@ "pricing": { "completion": "0.00000174", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000088", - "internal_reasoning": "0", - "prompt": "0.00000044", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000044", + "prompt": "0.00000044" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -18628,7 +19835,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -18782,15 +20002,10 @@ "moderation_required": false, "name": "AtlasCloud | z-ai/glm-4.7-20251222", "pricing": { - "completion": "0.00000174", + "completion": "0.00000175", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000044", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000012", + "prompt": "0.00000052" }, "provider_display_name": "AtlasCloud", "provider_info": { @@ -18973,7 +20188,20 @@ "google/gemini-3-flash-preview", "google/gemini-2.0-flash", "google/gemini-2.0-flash-lite", - "deepseek-ai/deepseek-ocr" + "deepseek-ai/deepseek-ocr", + "google/gemini-2.5-flash-image", + "google/gemini-3-pro-image-preview", + "xai/grok-4.1-fast-non-reasoning-developer", + "xai/grok-4.1-fast-reasoning-developer", + "xai/grok-4-fast-non-reasoning-developer", + "xai/grok-4-fast-reasoning-developer", + "xai/grok-4-0709", + "xai/grok-4-fast-reasoning", + "xai/grok-4-fast-non-reasoning", + "xai/grok-4.1-fast-reasoning", + "xai/grok-4.1-fast-non-reasoning", + "qwen/qwen3-max-2026-01-23", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, @@ -19132,12 +20360,7 @@ "pricing": { "completion": "0.00000594", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000001485", - "request": "0", - "web_search": "0" + "prompt": "0.000001485" }, "provider_display_name": "Azure", "provider_info": { @@ -19292,12 +20515,7 @@ "pricing": { "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "Azure", "provider_info": { @@ -19443,12 +20661,7 @@ "pricing": { "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "prompt": "0.000001" }, "provider_display_name": "Azure", "provider_info": { @@ -19600,12 +20813,7 @@ "pricing": { "completion": "0.000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "prompt": "0.000003" }, "provider_display_name": "Azure", "provider_info": { @@ -19757,12 +20965,7 @@ "pricing": { "completion": "0.00006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00003", - "request": "0", - "web_search": "0" + "prompt": "0.00003" }, "provider_display_name": "Azure", "provider_info": { @@ -19916,13 +21119,8 @@ "pricing": { "completion": "0.000008", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.0000005", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0" + "prompt": "0.000002" }, "provider_display_name": "Azure", "provider_info": { @@ -20088,13 +21286,8 @@ "pricing": { "completion": "0.0000016", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.0000001", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, "provider_display_name": "Azure", "provider_info": { @@ -20260,13 +21453,8 @@ "pricing": { "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Azure", "provider_info": { @@ -20412,12 +21600,7 @@ "pricing": { "completion": "0.00001", "discount": 0, - "image": "0.003613", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000025", - "request": "0", - "web_search": "0" + "prompt": "0.0000025" }, "provider_display_name": "Azure", "provider_info": { @@ -20586,12 +21769,7 @@ "pricing": { "completion": "0.000015", "discount": 0, - "image": "0.007225", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000005", - "request": "0", - "web_search": "0" + "prompt": "0.000005" }, "provider_display_name": "Azure", "provider_info": { @@ -20760,13 +21938,8 @@ "pricing": { "completion": "0.00001", "discount": 0, - "image": "0.003613", - "image_output": "0", "input_cache_read": "0.00000125", - "internal_reasoning": "0", - "prompt": "0.0000025", - "request": "0", - "web_search": "0" + "prompt": "0.0000025" }, "provider_display_name": "Azure", "provider_info": { @@ -20935,13 +22108,8 @@ "pricing": { "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.000000075", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Azure", "provider_info": { @@ -21136,13 +22304,8 @@ "pricing": { "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "prompt": "0.00000125" }, "provider_display_name": "Azure", "provider_info": { @@ -21314,13 +22477,8 @@ "pricing": { "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, "provider_display_name": "Azure", "provider_info": { @@ -21489,13 +22647,8 @@ "pricing": { "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000001", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000005" }, "provider_display_name": "Azure", "provider_info": { @@ -21677,13 +22830,8 @@ "pricing": { "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000013", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "prompt": "0.00000125" }, "provider_display_name": "Azure", "provider_info": { @@ -21866,13 +23014,8 @@ "pricing": { "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000013", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "prompt": "0.00000125" }, "provider_display_name": "Azure", "provider_info": { @@ -22053,13 +23196,8 @@ "pricing": { "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000013", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "prompt": "0.00000125" }, "provider_display_name": "Azure", "provider_info": { @@ -22242,13 +23380,8 @@ "pricing": { "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "prompt": "0.00000125" }, "provider_display_name": "Azure", "provider_info": { @@ -22431,13 +23564,8 @@ "pricing": { "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, "provider_display_name": "Azure", "provider_info": { @@ -22620,13 +23748,8 @@ "pricing": { "completion": "0.000014", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.000000175", - "internal_reasoning": "0", - "prompt": "0.00000175", - "request": "0", - "web_search": "0" + "prompt": "0.00000175" }, "provider_display_name": "Azure", "provider_info": { @@ -22809,13 +23932,8 @@ "pricing": { "completion": "0.000014", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.000000175", - "internal_reasoning": "0", - "prompt": "0.00000175", - "request": "0", - "web_search": "0" + "prompt": "0.00000175" }, "provider_display_name": "Azure", "provider_info": { @@ -22990,12 +24108,7 @@ "pricing": { "completion": "0.00000077", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000077", - "request": "0", - "web_search": "0" + "prompt": "0.00000077" }, "provider_display_name": "Baseten", "provider_info": { @@ -23067,21 +24180,21 @@ "warning_message": null }, { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-12-01T13:10:42.818885+00:00", + "author": "moonshotai", + "context_length": 262000, + "created_at": "2025-11-06T14:50:22.752525+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "endpoint": { "adapter_name": "BasetenAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.baseten.co/privacy-policy", @@ -23090,6 +24203,7 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, @@ -23100,7 +24214,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "40652a84-d35f-442b-9e1c-5f91dbea6ca9", + "id": "76eb9fc3-519d-4339-92aa-017dbef8cec4", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -23109,21 +24223,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 163800, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-12-01T13:10:42.818885+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -23132,41 +24246,36 @@ "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", - "model_variant_slug": "deepseek/deepseek-v3.2", + "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", + "model_variant_slug": "moonshotai/kimi-k2-thinking", "moderation_required": false, - "name": "BaseTen | deepseek/deepseek-v3.2-20251201", + "name": "BaseTen | moonshotai/kimi-k2-thinking-20251106", "pricing": { - "completion": "0.00000045", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "Baseten", "provider_info": { @@ -23197,7 +24306,7 @@ "slug": "baseten", "statusPageUrl": "https://status.baseten.co/" }, - "provider_model_id": "deepseek-ai/DeepSeek-V3.2", + "provider_model_id": "moonshotai/Kimi-K2-Thinking", "provider_name": "BaseTen", "provider_region": null, "provider_slug": "baseten/fp4", @@ -23207,11 +24316,9 @@ "include_reasoning", "max_tokens", "temperature", - "top_p", "stop", "response_format", "structured_outputs", - "top_k", "tools", "tool_choice" ], @@ -23229,40 +24336,44 @@ "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "moonshotai", - "context_length": 262000, - "created_at": "2025-09-04T21:25:47.673205+00:00", - "default_parameters": {}, + "context_length": 231000, + "created_at": "2026-01-27T04:11:16+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { "adapter_name": "BasetenAdapter", "can_abort": true, - "context_length": 262000, + "context_length": 231000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.baseten.co/privacy-policy", @@ -23271,20 +24382,18 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "reasoning_return_mechanism": "reasoning-content", + "supports_multipart": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": false, - "id": "80f48a1f-20f6-4f6d-a212-332a66ab2d96", + "id": "6b68e10a-44e0-4815-9b96-52f34335f83d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -23293,59 +24402,57 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 163800, + "max_completion_tokens": 231000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "moonshotai", "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", - "default_parameters": {}, + "created_at": "2026-01-27T04:11:16+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-0905", - "model_variant_slug": "moonshotai/kimi-k2-0905", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "BaseTen | moonshotai/kimi-k2-0905", + "name": "BaseTen | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.0000025", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "Baseten", "provider_info": { @@ -23376,188 +24483,7 @@ "slug": "baseten", "statusPageUrl": "https://status.baseten.co/" }, - "provider_model_id": "moonshotai/Kimi-K2-Instruct-0905", - "provider_name": "BaseTen", - "provider_region": null, - "provider_slug": "baseten/fp4", - "quantization": "fp4", - "supported_parameters": [ - "structured_outputs", - "response_format", - "max_tokens", - "temperature", - "stop", - "tools", - "tool_choice" - ], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "standard" - }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", - "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "moonshotai", - "context_length": 262000, - "created_at": "2025-11-06T14:50:22.752525+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], - "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", - "endpoint": { - "adapter_name": "BasetenAdapter", - "can_abort": true, - "context_length": 262000, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://www.baseten.co/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.baseten.co/terms-and-conditions", - "training": false - }, - "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": false, - "id": "76eb9fc3-519d-4339-92aa-017dbef8cec4", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 163800, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], - "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", - "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, - "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", - "warning_message": null - }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", - "moderation_required": false, - "name": "BaseTen | moonshotai/kimi-k2-thinking-20251106", - "pricing": { - "completion": "0.0000025", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "Baseten", - "provider_info": { - "adapterName": "BasetenAdapter", - "baseUrl": "https://inference.baseten.co/v1", - "byokEnabled": true, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://www.baseten.co/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.baseten.co/terms-and-conditions", - "training": false - }, - "displayName": "Baseten", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "US", - "icon": { - "url": "/images/icons/baseten-favicon.svg" - }, - "ignoredProviderModels": ["deepseek-ai/DeepSeek-R1"], - "isAbortable": true, - "isMultipartSupported": false, - "moderationRequired": false, - "name": "BaseTen", - "owners": ["{}"], - "slug": "baseten", - "statusPageUrl": "https://status.baseten.co/" - }, - "provider_model_id": "moonshotai/Kimi-K2-Thinking", + "provider_model_id": "moonshotai/Kimi-K2.5", "provider_name": "BaseTen", "provider_region": null, "provider_slug": "baseten/fp4", @@ -23582,31 +24508,29 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, { @@ -23708,12 +24632,7 @@ "pricing": { "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Baseten", "provider_info": { @@ -23829,7 +24748,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "66a338ab-559e-4807-ae88-3aeb6196d1b9", + "id": "7806a710-f20a-446c-9ef2-181ca46c1991", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -23878,19 +24797,14 @@ "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25:exacto", - "model_variant_slug": "qwen/qwen3-coder:exacto", + "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "model_variant_slug": "qwen/qwen3-coder", "moderation_required": false, - "name": "BaseTen | qwen/qwen3-coder-480b-a35b-07-25:exacto", + "name": "BaseTen | qwen/qwen3-coder-480b-a35b-07-25", "pricing": { "completion": "0.00000153", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000038", - "request": "0", - "web_search": "0" + "prompt": "0.00000038" }, "provider_display_name": "Baseten", "provider_info": { @@ -23940,7 +24854,7 @@ "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "exacto" + "variant": "standard" }, "features": { "reasoning_config": { @@ -23957,7 +24871,7 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B (exacto)", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { @@ -23966,7 +24880,7 @@ "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B (exacto)", + "short_name": "Qwen3 Coder 480B A35B", "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null @@ -23995,6 +24909,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supported_parameters": { "response_format": true, "structured_outputs": true @@ -24069,12 +24984,7 @@ "pricing": { "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "Baseten", "provider_info": { @@ -24261,7 +25171,7 @@ "router": null, "short_name": "FLUX.2 Flex", "slug": "black-forest-labs/flux.2-flex", - "updated_at": "2025-11-25T16:34:07.604557+00:00", + "updated_at": "2026-02-07T03:53:01.136158+00:00", "warning_message": null }, "model_variant_permaslug": "black-forest-labs/flux.2-flex", @@ -24269,14 +25179,10 @@ "moderation_required": false, "name": "Black Forest Labs | black-forest-labs/flux.2-flex", "pricing": { - "completion": "0.00001464", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00001464", - "request": "0", - "web_search": "0" + "image_output": "0.0000146484375", + "prompt": "0" }, "provider_display_name": "Black Forest Labs", "provider_info": { @@ -24345,13 +25251,13 @@ "router": null, "short_name": "FLUX.2 Flex", "slug": "black-forest-labs/flux.2-flex", - "updated_at": "2025-11-25T16:34:07.604557+00:00", + "updated_at": "2026-02-07T03:53:01.136158+00:00", "warning_message": null }, { "author": "black-forest-labs", - "context_length": 46864, - "created_at": "2025-12-16T03:59:30.221034+00:00", + "context_length": 40960, + "created_at": "2026-01-14T22:20:28.450361+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -24359,11 +25265,11 @@ }, "default_stops": [], "default_system": null, - "description": "FLUX.2 [max] is the new top-tier image model from Black Forest Labs, pushing image quality, prompt understanding, and editing consistency to the highest level yet.\n\nPricing is as follows, [per the docs](https://bfl.ai/pricing?category=flux.2):\nInput: We charge $0.03 for each megapixel on the input (i.e. reference images for editing)\nOutput: The first generated megapixel is charged $0.07. Each subsequent megapixel is charged $0.03.", + "description": "FLUX.2 [klein] 4B is the fastest and most cost-effective model in the FLUX.2 family, optimized for high-throughput use cases while maintaining excellent image quality.\n\nPricing is based on the output image. The first generated megapixel is charged $0.014. Each subsequent megapixel is charged $0.001.", "endpoint": { "adapter_name": "BlackForestLabsAdapter", "can_abort": false, - "context_length": 46864, + "context_length": 40960, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://bfl.ai/legal/privacy-policy", @@ -24383,7 +25289,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "79c839a0-5da8-4cc4-8324-dab2dd6893b0", + "id": "9e2e48d5-2016-4e18-976a-c463146c7f9c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -24397,8 +25303,8 @@ "max_tokens_per_image": null, "model": { "author": "black-forest-labs", - "context_length": 46864, - "created_at": "2025-12-16T03:59:30.221034+00:00", + "context_length": 40960, + "created_at": "2026-01-14T22:20:28.450361+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -24406,15 +25312,12 @@ }, "default_stops": [], "default_system": null, - "description": "FLUX.2 [max] is the new top-tier image model from Black Forest Labs, pushing image quality, prompt understanding, and editing consistency to the highest level yet.\n\nPricing is as follows, [per the docs](https://bfl.ai/pricing?category=flux.2):\nInput: We charge $0.03 for each megapixel on the input (i.e. reference images for editing)\nOutput: The first generated megapixel is charged $0.07. Each subsequent megapixel is charged $0.03.", + "description": "FLUX.2 [klein] 4B is the fastest and most cost-effective model in the FLUX.2 family, optimized for high-throughput use cases while maintaining excellent image quality.\n\nPricing is based on the output image. The first generated megapixel is charged $0.014. Each subsequent megapixel is charged $0.001.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, "group": "Other", @@ -24425,38 +25328,202 @@ "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Black Forest Labs: FLUX.2 Max", + "name": "Black Forest Labs: FLUX.2 Klein 4B", "output_modalities": ["image"], - "permaslug": "black-forest-labs/flux.2-max", + "permaslug": "black-forest-labs/flux.2-klein-4b", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "FLUX.2 Max", - "slug": "black-forest-labs/flux.2-max", - "updated_at": "2025-12-16T17:17:13.934433+00:00", + "short_name": "FLUX.2 Klein 4B", + "slug": "black-forest-labs/flux.2-klein-4b", + "updated_at": "2026-02-07T03:52:49.267303+00:00", "warning_message": null }, - "model_variant_permaslug": "black-forest-labs/flux.2-max", - "model_variant_slug": "black-forest-labs/flux.2-max", + "model_variant_permaslug": "black-forest-labs/flux.2-klein-4b", + "model_variant_slug": "black-forest-labs/flux.2-klein-4b", "moderation_required": false, - "name": "Black Forest Labs | black-forest-labs/flux.2-max", + "name": "Black Forest Labs | black-forest-labs/flux.2-klein-4b", "pricing": { - "completion": "0.00000732", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000732", - "request": "0", - "web_search": "0" + "image_output": "0.00000341796875", + "prompt": "0" }, "provider_display_name": "Black Forest Labs", "provider_info": { "adapterName": "BlackForestLabsAdapter", - "baseUrl": "https://api.us3.bfl.ai/v1", + "baseUrl": "https://api.bfl.ai/v1", + "byokEnabled": false, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://bfl.ai/legal/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://bfl.ai/legal/terms-of-service", + "training": false + }, + "displayName": "Black Forest Labs", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": false, + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://bfl.ai&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Black Forest Labs", + "owners": ["{}"], + "slug": "black-forest-labs", + "statusPageUrl": null + }, + "provider_model_id": "flux-2-klein-4b", + "provider_name": "Black Forest Labs", + "provider_region": null, + "provider_slug": "black-forest-labs", + "quantization": "unknown", + "supported_parameters": ["seed"], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": false, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": false, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Black Forest Labs: FLUX.2 Klein 4B", + "output_modalities": ["image"], + "permaslug": "black-forest-labs/flux.2-klein-4b", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "FLUX.2 Klein 4B", + "slug": "black-forest-labs/flux.2-klein-4b", + "updated_at": "2026-02-07T03:52:49.267303+00:00", + "warning_message": null + }, + { + "author": "black-forest-labs", + "context_length": 46864, + "created_at": "2025-12-16T03:59:30.221034+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "FLUX.2 [max] is the new top-tier image model from Black Forest Labs, pushing image quality, prompt understanding, and editing consistency to the highest level yet.\n\nPricing is as follows, [per the docs](https://bfl.ai/pricing?category=flux.2):\nInput: We charge $0.03 for each megapixel on the input (i.e. reference images for editing)\nOutput: The first generated megapixel is charged $0.07. Each subsequent megapixel is charged $0.03.", + "endpoint": { + "adapter_name": "BlackForestLabsAdapter", + "can_abort": false, + "context_length": 46864, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://bfl.ai/legal/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://bfl.ai/legal/terms-of-service", + "training": false + }, + "features": { + "supports_input_audio": false, + "supports_multipart": true, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "79c839a0-5da8-4cc4-8324-dab2dd6893b0", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "black-forest-labs", + "context_length": 46864, + "created_at": "2025-12-16T03:59:30.221034+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "FLUX.2 [max] is the new top-tier image model from Black Forest Labs, pushing image quality, prompt understanding, and editing consistency to the highest level yet.\n\nPricing is as follows, [per the docs](https://bfl.ai/pricing?category=flux.2):\nInput: We charge $0.03 for each megapixel on the input (i.e. reference images for editing)\nOutput: The first generated megapixel is charged $0.07. Each subsequent megapixel is charged $0.03.", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": false, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Black Forest Labs: FLUX.2 Max", + "output_modalities": ["image"], + "permaslug": "black-forest-labs/flux.2-max", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, + "router": null, + "short_name": "FLUX.2 Max", + "slug": "black-forest-labs/flux.2-max", + "updated_at": "2026-02-07T03:52:55.719201+00:00", + "warning_message": null + }, + "model_variant_permaslug": "black-forest-labs/flux.2-max", + "model_variant_slug": "black-forest-labs/flux.2-max", + "moderation_required": false, + "name": "Black Forest Labs | black-forest-labs/flux.2-max", + "pricing": { + "completion": "0", + "discount": 0, + "image_output": "0.00001708984375", + "prompt": "0" + }, + "provider_display_name": "Black Forest Labs", + "provider_info": { + "adapterName": "BlackForestLabsAdapter", + "baseUrl": "https://api.us3.bfl.ai/v1", "byokEnabled": false, "dataPolicy": { "canPublish": false, @@ -24522,7 +25589,7 @@ "router": null, "short_name": "FLUX.2 Max", "slug": "black-forest-labs/flux.2-max", - "updated_at": "2025-12-16T17:17:13.934433+00:00", + "updated_at": "2026-02-07T03:52:55.719201+00:00", "warning_message": null }, { @@ -24611,7 +25678,7 @@ "router": null, "short_name": "FLUX.2 Pro", "slug": "black-forest-labs/flux.2-pro", - "updated_at": "2025-11-25T16:34:12.266076+00:00", + "updated_at": "2026-02-07T03:53:05.5544+00:00", "warning_message": null }, "model_variant_permaslug": "black-forest-labs/flux.2-pro", @@ -24619,14 +25686,10 @@ "moderation_required": false, "name": "Black Forest Labs | black-forest-labs/flux.2-pro", "pricing": { - "completion": "0.00000366", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000366", - "request": "0", - "web_search": "0" + "image_output": "0.00000732421875", + "prompt": "0" }, "provider_display_name": "Black Forest Labs", "provider_info": { @@ -24695,7 +25758,7 @@ "router": null, "short_name": "FLUX.2 Pro", "slug": "black-forest-labs/flux.2-pro", - "updated_at": "2025-11-25T16:34:12.266076+00:00", + "updated_at": "2026-02-07T03:53:05.5544+00:00", "warning_message": null } ], @@ -24716,7 +25779,7 @@ "models": [ { "author": "meta-llama", - "context_length": 32000, + "context_length": 32768, "created_at": "2024-07-23T00:00:00+00:00", "default_parameters": {}, "default_stops": ["<|eot_id|>", "<|end_of_text|>"], @@ -24725,7 +25788,7 @@ "endpoint": { "adapter_name": "CerebrasAdapter", "can_abort": true, - "context_length": 32000, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.cerebras.ai/privacy-policy", @@ -24755,7 +25818,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32000, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -24792,12 +25855,8 @@ "pricing": { "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000001", + "prompt": "0.0000001" }, "provider_display_name": "Cerebras", "provider_info": { @@ -24848,7 +25907,8 @@ "logprobs", "top_logprobs", "tools", - "tool_choice" + "tool_choice", + "structured_outputs" ], "supports_multipart": false, "supports_reasoning": false, @@ -24917,7 +25977,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32000, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -24954,12 +26014,8 @@ "pricing": { "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000085", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000085", + "prompt": "0.00000085" }, "provider_display_name": "Cerebras", "provider_info": { @@ -25086,7 +26142,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 40960, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -25138,12 +26194,8 @@ "pricing": { "completion": "0.00000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000035", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000035", + "prompt": "0.00000035" }, "provider_display_name": "Cerebras", "provider_info": { @@ -25182,8 +26234,8 @@ "provider_model_id": "gpt-oss-120b", "provider_name": "Cerebras", "provider_region": null, - "provider_slug": "cerebras", - "quantization": "unknown", + "provider_slug": "cerebras/fp16", + "quantization": "fp16", "supported_parameters": [ "reasoning", "include_reasoning", @@ -25277,7 +26329,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 40960, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -25324,12 +26376,8 @@ "pricing": { "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000006", + "prompt": "0.0000006" }, "provider_display_name": "Cerebras", "provider_info": { @@ -25368,8 +26416,8 @@ "provider_model_id": "qwen-3-235b-a22b-instruct-2507", "provider_name": "Cerebras", "provider_region": null, - "provider_slug": "cerebras", - "quantization": "unknown", + "provider_slug": "cerebras/fp16", + "quantization": "fp16", "supported_parameters": [ "structured_outputs", "response_format", @@ -25460,7 +26508,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -25505,12 +26553,8 @@ "pricing": { "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000004", + "prompt": "0.0000004" }, "provider_display_name": "Cerebras", "provider_info": { @@ -25549,8 +26593,8 @@ "provider_model_id": "qwen-3-32b", "provider_name": "Cerebras", "provider_region": null, - "provider_slug": "cerebras", - "quantization": "unknown", + "provider_slug": "cerebras/fp16", + "quantization": "fp16", "supported_parameters": [ "reasoning", "include_reasoning", @@ -25602,15 +26646,15 @@ { "author": "z-ai", "context_length": 131072, - "created_at": "2025-09-30T12:32:56.306946+00:00", + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "endpoint": { "adapter_name": "CerebrasAdapter", "can_abort": true, @@ -25623,16 +26667,17 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning", "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, + "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "3464acc4-5bb0-4bb9-838a-574953b7fbff", + "id": "b44cc4d3-f68a-4565-8a0c-0dd31f03b687", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -25647,15 +26692,15 @@ "model": { "author": "z-ai", "context_length": 200000, - "created_at": "2025-09-30T12:32:56.306946+00:00", + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -25666,39 +26711,35 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6", - "model_variant_slug": "z-ai/glm-4.6", + "model_variant_permaslug": "z-ai/glm-4.7-20251222", + "model_variant_slug": "z-ai/glm-4.7", "moderation_required": false, - "name": "Cerebras | z-ai/glm-4.6", + "name": "Cerebras | z-ai/glm-4.7-20251222", "pricing": { "completion": "0.00000275", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000225", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000225", + "prompt": "0.00000225" }, "provider_display_name": "Cerebras", "provider_info": { @@ -25734,11 +26775,11 @@ "slug": "cerebras", "statusPageUrl": null }, - "provider_model_id": "zai-glm-4.6", + "provider_model_id": "zai-glm-4.7", "provider_name": "Cerebras", "provider_region": null, - "provider_slug": "cerebras", - "quantization": "unknown", + "provider_slug": "cerebras/fp16", + "quantization": "fp16", "supported_parameters": [ "reasoning", "include_reasoning", @@ -25749,9 +26790,10 @@ "seed", "logprobs", "top_logprobs", - "tool_choice", "tools", - "response_format" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": false, "supports_reasoning": true, @@ -25769,24 +26811,24 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null } ], @@ -25879,12 +26921,8 @@ "pricing": { "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.0000003" }, "provider_display_name": "Chutes", "provider_info": { @@ -25931,7 +26969,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -26062,12 +27106,8 @@ "pricing": { "completion": "0.00000087", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000019", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000095", + "prompt": "0.00000019" }, "provider_display_name": "Chutes", "provider_info": { @@ -26114,7 +27154,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -26140,11 +27186,13 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, @@ -26253,12 +27301,8 @@ "pricing": { "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000001", + "prompt": "0.0000002" }, "provider_display_name": "Chutes", "provider_info": { @@ -26305,7 +27349,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -26469,12 +27519,8 @@ "pricing": { "completion": "0.0000009", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000023", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000115", + "prompt": "0.00000023" }, "provider_display_name": "Chutes", "provider_info": { @@ -26521,7 +27567,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -26682,12 +27734,8 @@ "pricing": { "completion": "0.00000038", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000125", + "prompt": "0.00000025" }, "provider_display_name": "Chutes", "provider_info": { @@ -26734,7 +27782,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -26896,12 +27950,8 @@ "pricing": { "completion": "0.00000041", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000135", + "prompt": "0.00000027" }, "provider_display_name": "Chutes", "provider_info": { @@ -26948,7 +27998,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -27014,6 +28070,225 @@ "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, + { + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_system": null, + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "endpoint": { + "adapter_name": "ChutesAdapter", + "can_abort": true, + "context_length": 163840, + "data_policy": { + "canPublish": false, + "retainsPrompts": true, + "termsOfServiceURL": "https://chutes.ai/tos", + "training": true + }, + "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "40d2d4ad-ef95-4cc7-a103-2f7f414bd061", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 65536, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_system": null, + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "deepseek-r1", + "model_version_group_id": null, + "name": "DeepSeek: R1 0528", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-r1-0528", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", + "warning_message": null + }, + "model_variant_permaslug": "deepseek/deepseek-r1-0528", + "model_variant_slug": "deepseek/deepseek-r1-0528", + "moderation_required": false, + "name": "Chutes | deepseek/deepseek-r1-0528", + "pricing": { + "completion": "0.00000175", + "discount": 0, + "input_cache_read": "0.0000002", + "prompt": "0.0000004" + }, + "provider_display_name": "Chutes", + "provider_info": { + "adapterName": "ChutesAdapter", + "baseUrl": "https://llm.chutes.ai/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "termsOfServiceURL": "https://chutes.ai/tos", + "training": true + }, + "displayName": "Chutes", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": true, + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://chutes.ai/&size=256" + }, + "ignoredProviderModels": [ + "openbmb/MiniCPM4-8B", + "agentica-org/DeepSWE-Preview", + "moonshotai/Kimi-K2-Instruct-tools", + "internlm/Intern-S1", + "TheDrummer/Gemmasutra-Pro-27B-v1.1", + "all-hands/openhands-lm-32b-v0.1-ep3", + "TheDrummer/Tunguska-39B-v1", + "Meridian", + "Zenith", + "Proxima", + "agentica-org/DeepCoder-14B-Preview", + "TheDrummer/Cydonia-24B-v2.1", + "Tesslate/UIGEN-X-32B-0727", + "NousResearch/Hermes-4-14B", + "unsloth/gemma-3-4b-it", + "tencent/Hunyuan-A13B-Instruct", + "unsloth/Llama-3.2-3B-Instruct", + "unsloth/Llama-3.2-1B-Instruct", + "zai-org/GLM-4.5-turbo", + "zai-org/GLM-4.6-turbo", + "rednote-hilab/dots.ocr", + "deepseek-ai/DeepSeek-V3-0324-turbo", + "deepseek-ai/DeepSeek-V3.1-turbo", + "moonshotai/Kimi-K2-Thinking", + "zai-org/GLM-4.5", + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" + ], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Chutes", + "owners": ["{}"], + "slug": "chutes", + "statusPageUrl": null + }, + "provider_model_id": "deepseek-ai/DeepSeek-R1-0528-TEE", + "provider_name": "Chutes", + "provider_region": null, + "provider_slug": "chutes/fp8", + "quantization": "fp8", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "seed", + "top_k", + "repetition_penalty", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "deepseek-r1", + "model_version_group_id": null, + "name": "DeepSeek: R1 0528", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-r1-0528", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", + "warning_message": null + }, { "author": "deepseek", "context_length": 131072, @@ -27097,12 +28372,8 @@ "pricing": { "completion": "0.00000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000015", + "prompt": "0.00000003" }, "provider_display_name": "Chutes", "provider_info": { @@ -27149,7 +28420,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -27177,13 +28454,11 @@ "top_k", "repetition_penalty", "response_format", - "structured_outputs", - "tools", - "tool_choice" + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -27288,12 +28563,8 @@ "pricing": { "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000015", + "prompt": "0.00000003" }, "provider_display_name": "Chutes", "provider_info": { @@ -27340,7 +28611,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -27394,7 +28671,7 @@ }, { "author": "google", - "context_length": 96000, + "context_length": 128000, "created_at": "2025-03-12T05:12:39.645813+00:00", "default_parameters": { "frequency_penalty": null, @@ -27407,7 +28684,7 @@ "endpoint": { "adapter_name": "ChutesAdapter", "can_abort": true, - "context_length": 96000, + "context_length": 128000, "data_policy": { "canPublish": false, "retainsPrompts": true, @@ -27433,7 +28710,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 96000, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -27485,12 +28762,8 @@ "pricing": { "completion": "0.00000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000002", + "prompt": "0.00000004" }, "provider_display_name": "Chutes", "provider_info": { @@ -27537,7 +28810,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -27677,12 +28956,7 @@ "pricing": { "completion": "0.0000000681536", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000001703012", - "request": "0", - "web_search": "0" + "prompt": "0.00000001703012" }, "provider_display_name": "Chutes", "provider_info": { @@ -27729,7 +29003,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -27877,12 +29157,8 @@ "pricing": { "completion": "0.00000112", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000135", + "prompt": "0.00000027" }, "provider_display_name": "Chutes", "provider_info": { @@ -27929,7 +29205,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -27956,10 +29238,10 @@ "seed", "top_k", "repetition_penalty", + "response_format", "tools", "tool_choice", - "structured_outputs", - "response_format" + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -28082,7 +29364,7 @@ "router": null, "short_name": "Devstral 2 2512", "slug": "mistralai/devstral-2512", - "updated_at": "2025-12-09T16:24:38.243423+00:00", + "updated_at": "2026-01-26T16:44:50.583818+00:00", "warning_message": null }, "model_variant_permaslug": "mistralai/devstral-2512", @@ -28092,12 +29374,8 @@ "pricing": { "completion": "0.00000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000025", + "prompt": "0.00000005" }, "provider_display_name": "Chutes", "provider_info": { @@ -28144,7 +29422,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -28154,7 +29438,7 @@ "slug": "chutes", "statusPageUrl": null }, - "provider_model_id": "mistralai/Devstral-2-123B-Instruct-2512", + "provider_model_id": "mistralai/Devstral-2-123B-Instruct-2512-TEE", "provider_name": "Chutes", "provider_region": null, "provider_slug": "chutes/fp8", @@ -28207,7 +29491,7 @@ "router": null, "short_name": "Devstral 2 2512", "slug": "mistralai/devstral-2512", - "updated_at": "2025-12-09T16:24:38.243423+00:00", + "updated_at": "2026-01-26T16:44:50.583818+00:00", "warning_message": null }, { @@ -28288,12 +29572,8 @@ "pricing": { "completion": "0.00000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000001", + "prompt": "0.00000002" }, "provider_display_name": "Chutes", "provider_info": { @@ -28340,7 +29620,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -28483,15 +29769,220 @@ "model_variant_slug": "mistralai/mistral-small-24b-instruct-2501", "moderation_required": false, "name": "Chutes | mistralai/mistral-small-24b-instruct-2501", + "pricing": { + "completion": "0.0000003", + "discount": 0, + "input_cache_read": "0.000000035", + "prompt": "0.00000007" + }, + "provider_display_name": "Chutes", + "provider_info": { + "adapterName": "ChutesAdapter", + "baseUrl": "https://llm.chutes.ai/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "termsOfServiceURL": "https://chutes.ai/tos", + "training": true + }, + "displayName": "Chutes", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": true, + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://chutes.ai/&size=256" + }, + "ignoredProviderModels": [ + "openbmb/MiniCPM4-8B", + "agentica-org/DeepSWE-Preview", + "moonshotai/Kimi-K2-Instruct-tools", + "internlm/Intern-S1", + "TheDrummer/Gemmasutra-Pro-27B-v1.1", + "all-hands/openhands-lm-32b-v0.1-ep3", + "TheDrummer/Tunguska-39B-v1", + "Meridian", + "Zenith", + "Proxima", + "agentica-org/DeepCoder-14B-Preview", + "TheDrummer/Cydonia-24B-v2.1", + "Tesslate/UIGEN-X-32B-0727", + "NousResearch/Hermes-4-14B", + "unsloth/gemma-3-4b-it", + "tencent/Hunyuan-A13B-Instruct", + "unsloth/Llama-3.2-3B-Instruct", + "unsloth/Llama-3.2-1B-Instruct", + "zai-org/GLM-4.5-turbo", + "zai-org/GLM-4.6-turbo", + "rednote-hilab/dots.ocr", + "deepseek-ai/DeepSeek-V3-0324-turbo", + "deepseek-ai/DeepSeek-V3.1-turbo", + "moonshotai/Kimi-K2-Thinking", + "zai-org/GLM-4.5", + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" + ], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Chutes", + "owners": ["{}"], + "slug": "chutes", + "statusPageUrl": null + }, + "provider_model_id": "unsloth/Mistral-Small-24B-Instruct-2501", + "provider_name": "Chutes", + "provider_region": null, + "provider_slug": "chutes/bf16", + "quantization": "bf16", + "supported_parameters": [ + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "seed", + "top_k", + "repetition_penalty", + "response_format", + "structured_outputs", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", + "has_text_output": true, + "hf_slug": "mistralai/Mistral-Small-24B-Instruct-2501", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Mistral: Mistral Small 3", + "output_modalities": ["text"], + "permaslug": "mistralai/mistral-small-24b-instruct-2501", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, + "router": null, + "short_name": "Mistral Small 3", + "slug": "mistralai/mistral-small-24b-instruct-2501", + "updated_at": "2025-12-16T18:22:59.07006+00:00", + "warning_message": null + }, + { + "author": "mistralai", + "context_length": 131072, + "created_at": "2025-03-17T19:15:37.00423+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], + "default_system": null, + "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", + "endpoint": { + "adapter_name": "ChutesAdapter", + "can_abort": true, + "context_length": 131072, + "data_policy": { + "canPublish": false, + "retainsPrompts": true, + "termsOfServiceURL": "https://chutes.ai/tos", + "training": true + }, + "features": { + "supported_parameters": {}, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "5f981bd9-b098-4757-b075-06d51d37cc65", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 131072, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "mistralai", + "context_length": 128000, + "created_at": "2025-03-17T19:15:37.00423+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], + "default_system": null, + "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", + "has_text_output": true, + "hf_slug": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Mistral: Mistral Small 3.1 24B", + "output_modalities": ["text"], + "permaslug": "mistralai/mistral-small-3.1-24b-instruct-2503", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, + "router": null, + "short_name": "Mistral Small 3.1 24B", + "slug": "mistralai/mistral-small-3.1-24b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "mistralai/mistral-small-3.1-24b-instruct-2503", + "model_variant_slug": "mistralai/mistral-small-3.1-24b-instruct", + "moderation_required": false, + "name": "Chutes | mistralai/mistral-small-3.1-24b-instruct-2503", "pricing": { "completion": "0.00000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000015", + "prompt": "0.00000003" }, "provider_display_name": "Chutes", "provider_info": { @@ -28538,7 +30029,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -28548,7 +30045,7 @@ "slug": "chutes", "statusPageUrl": null }, - "provider_model_id": "unsloth/Mistral-Small-24B-Instruct-2501", + "provider_model_id": "chutesai/Mistral-Small-3.1-24B-Instruct-2503", "provider_name": "Chutes", "provider_region": null, "provider_slug": "chutes/bf16", @@ -28564,9 +30061,9 @@ "top_k", "repetition_penalty", "response_format", - "structured_outputs", "tools", - "tool_choice" + "tool_choice", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, @@ -28575,7 +30072,6 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -28584,36 +30080,36 @@ }, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-24B-Instruct-2501", + "hf_slug": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3", + "name": "Mistral: Mistral Small 3.1 24B", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-24b-instruct-2501", + "permaslug": "mistralai/mistral-small-3.1-24b-instruct-2503", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Mistral Small 3", - "slug": "mistralai/mistral-small-24b-instruct-2501", - "updated_at": "2025-12-16T18:22:59.07006+00:00", + "short_name": "Mistral Small 3.1 24B", + "slug": "mistralai/mistral-small-3.1-24b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "mistralai", "context_length": 131072, - "created_at": "2025-03-17T19:15:37.00423+00:00", + "created_at": "2025-06-20T18:10:16.960494+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", "endpoint": { "adapter_name": "ChutesAdapter", "can_abort": true, @@ -28625,7 +30121,9 @@ "training": true }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -28635,7 +30133,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5f981bd9-b098-4757-b075-06d51d37cc65", + "id": "a19c2a4a-f3dc-451f-ae9f-07ae51a6b234", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -28650,55 +30148,41 @@ "model": { "author": "mistralai", "context_length": 128000, - "created_at": "2025-03-17T19:15:37.00423+00:00", + "created_at": "2025-06-20T18:10:16.960494+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", + "features": {}, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", + "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3.1 24B", + "name": "Mistral: Mistral Small 3.2 24B", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-3.1-24b-instruct-2503", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", + "reasoning_config": null, "router": null, - "short_name": "Mistral Small 3.1 24B", - "slug": "mistralai/mistral-small-3.1-24b-instruct", + "short_name": "Mistral Small 3.2 24B", + "slug": "mistralai/mistral-small-3.2-24b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-small-3.1-24b-instruct-2503", - "model_variant_slug": "mistralai/mistral-small-3.1-24b-instruct", + "model_variant_permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", + "model_variant_slug": "mistralai/mistral-small-3.2-24b-instruct", "moderation_required": false, - "name": "Chutes | mistralai/mistral-small-3.1-24b-instruct-2503", + "name": "Chutes | mistralai/mistral-small-3.2-24b-instruct-2506", "pricing": { - "completion": "0.00000011", + "completion": "0.00000018", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000003", + "prompt": "0.00000006" }, "provider_display_name": "Chutes", "provider_info": { @@ -28745,7 +30229,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -28755,12 +30245,13 @@ "slug": "chutes", "statusPageUrl": null }, - "provider_model_id": "chutesai/Mistral-Small-3.1-24B-Instruct-2503", + "provider_model_id": "chutesai/Mistral-Small-3.2-24B-Instruct-2506", "provider_name": "Chutes", "provider_region": null, "provider_slug": "chutes/bf16", "quantization": "bf16", "supported_parameters": [ + "structured_outputs", "max_tokens", "temperature", "top_p", @@ -28772,8 +30263,7 @@ "repetition_penalty", "response_format", "tools", - "tool_choice", - "structured_outputs" + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, @@ -28781,49 +30271,37 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "features": {}, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", + "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3.1 24B", + "name": "Mistral: Mistral Small 3.2 24B", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-3.1-24b-instruct-2503", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", + "reasoning_config": null, "router": null, - "short_name": "Mistral Small 3.1 24B", - "slug": "mistralai/mistral-small-3.1-24b-instruct", + "short_name": "Mistral Small 3.2 24B", + "slug": "mistralai/mistral-small-3.2-24b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 131072, - "created_at": "2025-06-20T18:10:16.960494+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { "adapter_name": "ChutesAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "retainsPrompts": true, @@ -28832,8 +30310,9 @@ }, "features": { "supported_parameters": { - "structured_outputs": true + "response_format": true }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -28843,7 +30322,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "a19c2a4a-f3dc-451f-ae9f-07ae51a6b234", + "id": "bf206ca2-7905-4c08-a30c-c1be7a45d4d4", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -28852,51 +30331,55 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 128000, - "created_at": "2025-06-20T18:10:16.960494+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", - "features": {}, - "group": "Mistral", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3.2 24B", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-0905", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mistral Small 3.2 24B", - "slug": "mistralai/mistral-small-3.2-24b-instruct", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "model_variant_slug": "mistralai/mistral-small-3.2-24b-instruct", + "model_variant_permaslug": "moonshotai/kimi-k2-0905", + "model_variant_slug": "moonshotai/kimi-k2-0905", "moderation_required": false, - "name": "Chutes | mistralai/mistral-small-3.2-24b-instruct-2506", + "name": "Chutes | moonshotai/kimi-k2-0905", "pricing": { - "completion": "0.00000018", + "completion": "0.0000019", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000195", + "prompt": "0.00000039" }, "provider_display_name": "Chutes", "provider_info": { @@ -28943,7 +30426,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -28953,13 +30442,13 @@ "slug": "chutes", "statusPageUrl": null }, - "provider_model_id": "chutesai/Mistral-Small-3.2-24B-Instruct-2506", + "provider_model_id": "moonshotai/Kimi-K2-Instruct-0905", "provider_name": "Chutes", "provider_region": null, - "provider_slug": "chutes/bf16", - "quantization": "bf16", + "provider_slug": "chutes/fp8", + "quantization": "fp8", "supported_parameters": [ - "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -28969,7 +30458,7 @@ "seed", "top_k", "repetition_penalty", - "response_format", + "structured_outputs", "tools", "tool_choice" ], @@ -28979,33 +30468,47 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3.2 24B", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-0905", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mistral Small 3.2 24B", - "slug": "mistralai/mistral-small-3.2-24b-instruct", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "moonshotai", "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", - "default_parameters": {}, + "created_at": "2025-11-06T14:50:22.752525+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "endpoint": { "adapter_name": "ChutesAdapter", "can_abort": true, @@ -29017,10 +30520,8 @@ "training": true }, "features": { - "supported_parameters": { - "response_format": true - }, - "supports_input_audio": false, + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -29030,7 +30531,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "bf206ca2-7905-4c08-a30c-c1be7a45d4d4", + "id": "5389e662-1ebc-49cf-b761-aa379ba5b381", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -29039,59 +30540,60 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 65535, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "moonshotai", "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", - "default_parameters": {}, + "created_at": "2025-11-06T14:50:22.752525+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-0905", - "model_variant_slug": "moonshotai/kimi-k2-0905", + "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", + "model_variant_slug": "moonshotai/kimi-k2-thinking", "moderation_required": false, - "name": "Chutes | moonshotai/kimi-k2-0905", + "name": "Chutes | moonshotai/kimi-k2-thinking-20251106", "pricing": { - "completion": "0.0000019", + "completion": "0.00000175", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000039", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000002", + "prompt": "0.0000004" }, "provider_display_name": "Chutes", "provider_info": { @@ -29138,7 +30640,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -29148,13 +30656,14 @@ "slug": "chutes", "statusPageUrl": null }, - "provider_model_id": "moonshotai/Kimi-K2-Instruct-0905", + "provider_model_id": "moonshotai/Kimi-K2-Thinking-TEE", "provider_name": "Chutes", "provider_region": null, - "provider_slug": "chutes/fp8", - "quantization": "fp8", + "provider_slug": "chutes/int4", + "quantization": "int4", "supported_parameters": [ - "response_format", + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -29164,49 +30673,51 @@ "seed", "top_k", "repetition_penalty", + "response_format", "structured_outputs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "moonshotai", "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -29214,7 +30725,7 @@ }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { "adapter_name": "ChutesAdapter", "can_abort": true, @@ -29226,7 +30737,6 @@ "training": true }, "features": { - "is_mandatory_reasoning": true, "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, @@ -29237,7 +30747,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5389e662-1ebc-49cf-b761-aa379ba5b381", + "id": "b2debbe6-9b98-4db0-9cc7-6f172152d210", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -29252,7 +30762,7 @@ "model": { "author": "moonshotai", "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -29260,50 +30770,44 @@ }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "Chutes | moonshotai/kimi-k2-thinking-20251106", + "name": "Chutes | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.00000175", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000001125", + "prompt": "0.00000045" }, "provider_display_name": "Chutes", "provider_info": { @@ -29350,7 +30854,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -29360,7 +30870,7 @@ "slug": "chutes", "statusPageUrl": null }, - "provider_model_id": "moonshotai/Kimi-K2-Thinking-TEE", + "provider_model_id": "moonshotai/Kimi-K2.5-TEE", "provider_name": "Chutes", "provider_region": null, "provider_slug": "chutes/int4", @@ -29377,10 +30887,10 @@ "seed", "top_k", "repetition_penalty", + "response_format", "structured_outputs", "tools", - "tool_choice", - "response_format" + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, @@ -29391,31 +30901,29 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, { @@ -29493,12 +31001,8 @@ "pricing": { "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000001", + "prompt": "0.00000002" }, "provider_display_name": "Chutes", "provider_info": { @@ -29545,7 +31049,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -29686,12 +31196,8 @@ "pricing": { "completion": "0.00000038", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000011", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000055", + "prompt": "0.00000011" }, "provider_display_name": "Chutes", "provider_info": { @@ -29738,7 +31244,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -29899,12 +31411,8 @@ "pricing": { "completion": "0.00000024", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000003", + "prompt": "0.00000006" }, "provider_display_name": "Chutes", "provider_info": { @@ -29951,7 +31459,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -29978,10 +31492,10 @@ "seed", "top_k", "repetition_penalty", - "tool_choice", + "response_format", "tools", - "structured_outputs", - "response_format" + "tool_choice", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -30112,12 +31626,8 @@ "pricing": { "completion": "0.00000018", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000002", + "prompt": "0.00000004" }, "provider_display_name": "Chutes", "provider_info": { @@ -30164,7 +31674,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -30330,14 +31846,10 @@ "moderation_required": false, "name": "Chutes | openai/gpt-oss-20b", "pricing": { - "completion": "0.0000001", + "completion": "0.00000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000002", + "prompt": "0.00000004" }, "provider_display_name": "Chutes", "provider_info": { @@ -30384,7 +31896,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -30536,14 +32054,10 @@ "moderation_required": false, "name": "Chutes | opengvlab/internvl3-78b", "pricing": { - "completion": "0.00000039", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000075", + "prompt": "0.00000015" }, "provider_display_name": "Chutes", "provider_info": { @@ -30590,7 +32104,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -30728,12 +32248,8 @@ "pricing": { "completion": "0.00000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000025", + "prompt": "0.00000005" }, "provider_display_name": "Chutes", "provider_info": { @@ -30780,7 +32296,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -30907,12 +32429,8 @@ "pricing": { "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000075", + "prompt": "0.00000015" }, "provider_display_name": "Chutes", "provider_info": { @@ -30959,7 +32477,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -31095,12 +32619,8 @@ "pricing": { "completion": "0.00000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000025", + "prompt": "0.00000005" }, "provider_display_name": "Chutes", "provider_info": { @@ -31147,7 +32667,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -31295,12 +32821,8 @@ "pricing": { "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.0000003" }, "provider_display_name": "Chutes", "provider_info": { @@ -31347,7 +32869,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -31496,12 +33024,8 @@ "pricing": { "completion": "0.00000055", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000004", + "prompt": "0.00000008" }, "provider_display_name": "Chutes", "provider_info": { @@ -31548,7 +33072,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -31707,12 +33237,8 @@ "pricing": { "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000011", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000055", + "prompt": "0.00000011" }, "provider_display_name": "Chutes", "provider_info": { @@ -31759,7 +33285,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -31921,12 +33453,8 @@ "pricing": { "completion": "0.00000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000003", + "prompt": "0.00000006" }, "provider_display_name": "Chutes", "provider_info": { @@ -31973,7 +33501,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -32125,12 +33659,8 @@ "pricing": { "completion": "0.00000033", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000004", + "prompt": "0.00000008" }, "provider_display_name": "Chutes", "provider_info": { @@ -32177,7 +33707,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -32325,12 +33861,8 @@ "pricing": { "completion": "0.00000024", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000004", + "prompt": "0.00000008" }, "provider_display_name": "Chutes", "provider_info": { @@ -32377,7 +33909,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -32445,11 +33983,15 @@ { "author": "qwen", "context_length": 262144, - "created_at": "2025-07-23T00:29:06+00:00", - "default_parameters": {}, + "created_at": "2026-02-04T00:15:01.820167+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per token, delivering performance comparable to models with 10 to 20x higher active compute, which makes it well suited for cost-sensitive, always-on agent deployment.\n\nThe model is trained with a strong agentic focus and performs reliably on long-horizon coding tasks, complex tool usage, and recovery from execution failures. With a native 256k context window, it integrates cleanly into real-world CLI and IDE environments and adapts well to common agent scaffolds used by modern coding tools. The model operates exclusively in non-thinking mode and does not emit blocks, simplifying integration for production coding agents.", "endpoint": { "adapter_name": "ChutesAdapter", "can_abort": true, @@ -32461,20 +34003,16 @@ "training": true }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "14d6aeff-9fe4-4ad4-aaf4-a58b7fdd7a19", + "id": "b0bd131e-0f14-43f7-b5ce-3e7636a86a3b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -32483,59 +34021,58 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 1048576, - "created_at": "2025-07-23T00:29:06+00:00", - "default_parameters": {}, + "context_length": 262144, + "created_at": "2026-02-04T00:15:01.820167+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per token, delivering performance comparable to models with 10 to 20x higher active compute, which makes it well suited for cost-sensitive, always-on agent deployment.\n\nThe model is trained with a strong agentic focus and performs reliably on long-horizon coding tasks, complex tool usage, and recovery from execution failures. With a native 256k context window, it integrates cleanly into real-world CLI and IDE environments and adapts well to common agent scaffolds used by modern coding tools. The model operates exclusively in non-thinking mode and does not emit blocks, simplifying integration for production coding agents.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, - "group": "Qwen3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-Next", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Qwen: Qwen3 Coder Next", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "permaslug": "qwen/qwen3-coder-next-2025-02-03", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Coder Next", + "slug": "qwen/qwen3-coder-next", + "updated_at": "2026-02-04T00:27:00.409072+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "model_variant_slug": "qwen/qwen3-coder", + "model_variant_permaslug": "qwen/qwen3-coder-next-2025-02-03", + "model_variant_slug": "qwen/qwen3-coder-next", "moderation_required": false, - "name": "Chutes | qwen/qwen3-coder-480b-a35b-07-25", + "name": "Chutes | qwen/qwen3-coder-next-2025-02-03", "pricing": { - "completion": "0.00000095", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000022", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000035", + "prompt": "0.00000007" }, "provider_display_name": "Chutes", "provider_info": { @@ -32582,7 +34119,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -32592,14 +34135,12 @@ "slug": "chutes", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8-TEE", + "provider_model_id": "Qwen/Qwen3-Coder-Next", "provider_name": "Chutes", "provider_region": null, "provider_slug": "chutes/bf16", "quantization": "bf16", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", @@ -32609,6 +34150,8 @@ "seed", "top_k", "repetition_penalty", + "response_format", + "structured_outputs", "tools", "tool_choice" ], @@ -32619,32 +34162,31 @@ "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, - "group": "Qwen3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-Next", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Qwen: Qwen3 Coder Next", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "permaslug": "qwen/qwen3-coder-next-2025-02-03", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Coder Next", + "slug": "qwen/qwen3-coder-next", + "updated_at": "2026-02-04T00:27:00.409072+00:00", "warning_message": null }, { @@ -32731,12 +34273,8 @@ "pricing": { "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000005", + "prompt": "0.0000001" }, "provider_display_name": "Chutes", "provider_info": { @@ -32783,7 +34321,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -32943,12 +34487,8 @@ "pricing": { "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.0000003" }, "provider_display_name": "Chutes", "provider_info": { @@ -32995,7 +34535,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -33131,14 +34677,10 @@ "moderation_required": false, "name": "Chutes | qwen/qwen-2.5-72b-instruct", "pricing": { - "completion": "0.00000052", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000013", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.0000003" }, "provider_display_name": "Chutes", "provider_info": { @@ -33185,7 +34727,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -33314,12 +34862,8 @@ "pricing": { "completion": "0.00000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000015", + "prompt": "0.00000003" }, "provider_display_name": "Chutes", "provider_info": { @@ -33366,7 +34910,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -33502,12 +35052,8 @@ "pricing": { "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.0000003" }, "provider_display_name": "Chutes", "provider_info": { @@ -33554,7 +35100,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -33702,12 +35254,8 @@ "pricing": { "completion": "0.00000085", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000125", + "prompt": "0.00000025" }, "provider_display_name": "Chutes", "provider_info": { @@ -33754,7 +35302,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -33915,12 +35469,8 @@ "pricing": { "completion": "0.00000085", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000125", + "prompt": "0.00000025" }, "provider_display_name": "Chutes", "provider_info": { @@ -33967,7 +35517,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -34035,6 +35591,222 @@ "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, + { + "author": "xiaomi", + "context_length": 262144, + "created_at": "2025-12-14T16:55:08+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": 0.95 + }, + "default_stops": [], + "default_system": "You are MiMo, an AI assistant developed by Xiaomi.\n\nYour knowledge cutoff date is December 2024.", + "description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config).", + "endpoint": { + "adapter_name": "ChutesAdapter", + "can_abort": true, + "context_length": 262144, + "data_policy": { + "canPublish": false, + "retainsPrompts": true, + "termsOfServiceURL": "https://chutes.ai/tos", + "training": true + }, + "features": { + "reasoning_return_mechanism": "reasoning-content", + "supports_tool_choice": { + "literal_auto": true, + "literal_none": false, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "87ba9462-69a4-4c37-a009-60a513f0d901", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "xiaomi", + "context_length": 262144, + "created_at": "2025-12-14T16:55:08+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": 0.95 + }, + "default_stops": [], + "default_system": "You are MiMo, an AI assistant developed by Xiaomi.\n\nYour knowledge cutoff date is December 2024.", + "description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config).", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "XiaomiMiMo/MiMo-V2-Flash", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Xiaomi: MiMo-V2-Flash", + "output_modalities": ["text"], + "permaslug": "xiaomi/mimo-v2-flash-20251210", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "MiMo-V2-Flash", + "slug": "xiaomi/mimo-v2-flash", + "updated_at": "2026-01-21T16:26:04.702272+00:00", + "warning_message": null + }, + "model_variant_permaslug": "xiaomi/mimo-v2-flash-20251210", + "model_variant_slug": "xiaomi/mimo-v2-flash", + "moderation_required": false, + "name": "Chutes | xiaomi/mimo-v2-flash-20251210", + "pricing": { + "completion": "0.00000029", + "discount": 0, + "input_cache_read": "0.000000045", + "prompt": "0.00000009" + }, + "provider_display_name": "Chutes", + "provider_info": { + "adapterName": "ChutesAdapter", + "baseUrl": "https://llm.chutes.ai/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "termsOfServiceURL": "https://chutes.ai/tos", + "training": true + }, + "displayName": "Chutes", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": true, + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://chutes.ai/&size=256" + }, + "ignoredProviderModels": [ + "openbmb/MiniCPM4-8B", + "agentica-org/DeepSWE-Preview", + "moonshotai/Kimi-K2-Instruct-tools", + "internlm/Intern-S1", + "TheDrummer/Gemmasutra-Pro-27B-v1.1", + "all-hands/openhands-lm-32b-v0.1-ep3", + "TheDrummer/Tunguska-39B-v1", + "Meridian", + "Zenith", + "Proxima", + "agentica-org/DeepCoder-14B-Preview", + "TheDrummer/Cydonia-24B-v2.1", + "Tesslate/UIGEN-X-32B-0727", + "NousResearch/Hermes-4-14B", + "unsloth/gemma-3-4b-it", + "tencent/Hunyuan-A13B-Instruct", + "unsloth/Llama-3.2-3B-Instruct", + "unsloth/Llama-3.2-1B-Instruct", + "zai-org/GLM-4.5-turbo", + "zai-org/GLM-4.6-turbo", + "rednote-hilab/dots.ocr", + "deepseek-ai/DeepSeek-V3-0324-turbo", + "deepseek-ai/DeepSeek-V3.1-turbo", + "moonshotai/Kimi-K2-Thinking", + "zai-org/GLM-4.5", + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" + ], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Chutes", + "owners": ["{}"], + "slug": "chutes", + "statusPageUrl": null + }, + "provider_model_id": "XiaomiMiMo/MiMo-V2-Flash", + "provider_name": "Chutes", + "provider_region": null, + "provider_slug": "chutes", + "quantization": "unknown", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "seed", + "top_k", + "repetition_penalty", + "tools", + "tool_choice", + "structured_outputs", + "response_format" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "XiaomiMiMo/MiMo-V2-Flash", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Xiaomi: MiMo-V2-Flash", + "output_modalities": ["text"], + "permaslug": "xiaomi/mimo-v2-flash-20251210", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "MiMo-V2-Flash", + "slug": "xiaomi/mimo-v2-flash", + "updated_at": "2026-01-21T16:26:04.702272+00:00", + "warning_message": null + }, { "author": "z-ai", "context_length": 131072, @@ -34058,6 +35830,7 @@ "training": true }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -34128,12 +35901,8 @@ "pricing": { "completion": "0.00000155", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000035", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000175", + "prompt": "0.00000035" }, "provider_display_name": "Chutes", "provider_info": { @@ -34180,7 +35949,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -34271,6 +36046,7 @@ "training": true }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -34338,14 +36114,10 @@ "moderation_required": false, "name": "Chutes | z-ai/glm-4.5-air", "pricing": { - "completion": "0.00000022", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000075", + "prompt": "0.00000015" }, "provider_display_name": "Chutes", "provider_info": { @@ -34392,7 +36164,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -34482,6 +36260,7 @@ "training": true }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -34552,12 +36331,8 @@ "pricing": { "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000035", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000175", + "prompt": "0.00000035" }, "provider_display_name": "Chutes", "provider_info": { @@ -34604,7 +36379,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -34632,9 +36413,9 @@ "top_k", "repetition_penalty", "response_format", - "structured_outputs", "tools", - "tool_choice" + "tool_choice", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -34695,6 +36476,7 @@ "training": true }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_base64_video_input": false, "supports_tool_choice": { "literal_auto": true, @@ -34767,12 +36549,8 @@ "pricing": { "completion": "0.0000009", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.0000003" }, "provider_display_name": "Chutes", "provider_info": { @@ -34819,7 +36597,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -34846,10 +36630,10 @@ "seed", "top_k", "repetition_penalty", + "response_format", "tools", "tool_choice", - "structured_outputs", - "response_format" + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -34981,12 +36765,8 @@ "pricing": { "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000002", + "prompt": "0.0000004" }, "provider_display_name": "Chutes", "provider_info": { @@ -35033,7 +36813,13 @@ "deepseek-ai/DeepSeek-V3.1-turbo", "moonshotai/Kimi-K2-Thinking", "zai-org/GLM-4.5", - "deepseek-ai/DeepSeek-V3.1" + "deepseek-ai/DeepSeek-V3.1", + "zai-org/GLM-4.6-FP8", + "zai-org/GLM-4.7-FP8", + "zai-org/GLM-4.5-FP8", + "tngtech/TNG-R1T-Chimera-Turbo", + "miromind-ai/MiroThinker-v1.5-235B", + "Qwen/Qwen3Guard-Gen-0.6B" ], "isAbortable": true, "isMultipartSupported": true, @@ -35060,10 +36846,10 @@ "seed", "top_k", "repetition_penalty", - "tools", - "tool_choice", + "response_format", "structured_outputs", - "response_format" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, @@ -35192,12 +36978,7 @@ "pricing": { "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000005" }, "provider_display_name": "Cirrascale", "provider_info": { @@ -35228,7 +37009,9 @@ "olmOCR-2-7B-1025", "Olmo-3-7B-Think", "Olmo-3-32B-Think", - "Olmo-3-7B-Instruct" + "Olmo-3-7B-Instruct", + "Olmo-3.1-32B-Instruct", + "Olmo-3.1-32B-Think" ], "isAbortable": true, "isMultipartSupported": true, @@ -35382,13 +37165,7 @@ "pricing": { "completion": "0.00000015", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.000000045", - "request": "0", - "web_search": "0" + "prompt": "0.000000045" }, "provider_display_name": "Clarifai", "provider_info": { @@ -35574,13 +37351,7 @@ "pricing": { "completion": "0.00000036", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.00000009" }, "provider_display_name": "Clarifai", "provider_info": { @@ -35770,13 +37541,7 @@ "pricing": { "completion": "0.00000018", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.000000045", - "request": "0", - "web_search": "0" + "prompt": "0.000000045" }, "provider_display_name": "Clarifai", "provider_info": { @@ -35970,12 +37735,7 @@ "pricing": { "completion": "0.00000488", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -36142,12 +37902,7 @@ "pricing": { "completion": "0.00000056", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000035", - "request": "0", - "web_search": "0" + "prompt": "0.00000035" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -36325,12 +38080,7 @@ "pricing": { "completion": "0.00000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000017", - "request": "0", - "web_search": "0" + "prompt": "0.000000017" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -36497,12 +38247,7 @@ "pricing": { "completion": "0.00000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000048", - "request": "0", - "web_search": "0" + "prompt": "0.00000048" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -36659,12 +38404,7 @@ "pricing": { "completion": "0.00000083", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000028", - "request": "0", - "web_search": "0" + "prompt": "0.00000028" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -36821,12 +38561,7 @@ "pricing": { "completion": "0.00000029", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -36985,12 +38720,7 @@ "pricing": { "completion": "0.00000068", "discount": 0, - "image": "0.001281", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000049", - "request": "0", - "web_search": "0" + "prompt": "0.000000049" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -37147,12 +38877,7 @@ "pricing": { "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000027", - "request": "0", - "web_search": "0" + "prompt": "0.000000027" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -37309,12 +39034,7 @@ "pricing": { "completion": "0.00000034", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000051", - "request": "0", - "web_search": "0" + "prompt": "0.000000051" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -37471,12 +39191,7 @@ "pricing": { "completion": "0.00000225", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000029", - "request": "0", - "web_search": "0" + "prompt": "0.00000029" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -37637,12 +39352,7 @@ "pricing": { "completion": "0.00000019", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000011", - "request": "0", - "web_search": "0" + "prompt": "0.00000011" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -37813,12 +39523,7 @@ "pricing": { "completion": "0.00000056", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000035", - "request": "0", - "web_search": "0" + "prompt": "0.00000035" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -37995,12 +39700,7 @@ "pricing": { "completion": "0.00000034", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000051", - "request": "0", - "web_search": "0" + "prompt": "0.000000051" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -38169,12 +39869,7 @@ "pricing": { "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000066", - "request": "0", - "web_search": "0" + "prompt": "0.00000066" }, "provider_display_name": "Cloudflare", "provider_info": { @@ -38346,12 +40041,7 @@ "pricing": { "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000025", - "request": "0", - "web_search": "0" + "prompt": "0.0000025" }, "provider_display_name": "Cohere", "provider_info": { @@ -38500,12 +40190,7 @@ "pricing": { "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Cohere", "provider_info": { @@ -38656,12 +40341,7 @@ "pricing": { "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000025", - "request": "0", - "web_search": "0" + "prompt": "0.0000025" }, "provider_display_name": "Cohere", "provider_info": { @@ -38811,12 +40491,7 @@ "pricing": { "completion": "0.00000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000000375", - "request": "0", - "web_search": "0" + "prompt": "0.0000000375" }, "provider_display_name": "Cohere", "provider_info": { @@ -38914,7 +40589,7 @@ "default_parameters": null, "default_stops": [], "default_system": null, - "description": "Corethink - AI that reasons through problems instead of guessing. Available free of charge in Kilo for a limited time.", + "description": "CoreThink - AI that reasons through problems instead of guessing. Available free of charge in Kilo for a limited time.", "endpoint": { "adapter_name": "other", "can_abort": true, @@ -38947,7 +40622,7 @@ "default_parameters": null, "default_stops": [], "default_system": null, - "description": "Corethink - AI that reasons through problems instead of guessing. Available free of charge in Kilo for a limited time.", + "description": "CoreThink - AI that reasons through problems instead of guessing. Available free of charge in Kilo for a limited time.", "features": null, "group": "other", "has_text_output": true, @@ -38964,7 +40639,7 @@ "router": null, "short_name": "CoreThink (free)", "slug": "corethink:free", - "updated_at": "2026-01-13T12:11:27.631Z", + "updated_at": "2026-02-10T10:49:03.904Z", "warning_message": null }, "model_variant_permaslug": "corethink:free", @@ -39037,7 +40712,7 @@ "router": null, "short_name": "CoreThink (free)", "slug": "corethink", - "updated_at": "2026-01-13T12:11:27.631Z", + "updated_at": "2026-02-10T10:49:03.904Z", "warning_message": null } ], @@ -39131,13 +40806,8 @@ "pricing": { "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000025", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, "provider_display_name": "Crusoe", "provider_info": { @@ -39306,13 +40976,8 @@ "pricing": { "completion": "0.0000054", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000068", - "internal_reasoning": "0", - "prompt": "0.00000135", - "request": "0", - "web_search": "0" + "prompt": "0.00000135" }, "provider_display_name": "Crusoe", "provider_info": { @@ -39480,13 +41145,8 @@ "pricing": { "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000004", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "prompt": "0.00000008" }, "provider_display_name": "Crusoe", "provider_info": { @@ -39641,13 +41301,8 @@ "pricing": { "completion": "0.00000075", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000013", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, "provider_display_name": "Crusoe", "provider_info": { @@ -39826,13 +41481,8 @@ "pricing": { "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000008", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Crusoe", "provider_info": { @@ -39886,13 +41536,11 @@ "repetition_penalty", "top_k", "logit_bias", - "tools", - "tool_choice", "reasoning_effort" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -40011,13 +41659,8 @@ "pricing": { "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000011", - "internal_reasoning": "0", - "prompt": "0.00000022", - "request": "0", - "web_search": "0" + "prompt": "0.00000022" }, "provider_display_name": "Crusoe", "provider_info": { @@ -40217,12 +41860,7 @@ "pricing": { "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -40307,13 +41945,18 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, @@ -40470,12 +42113,7 @@ "pricing": { "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000005", - "request": "0", - "web_search": "0" + "prompt": "0.000000005" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -40560,13 +42198,18 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, @@ -40718,12 +42361,7 @@ "pricing": { "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000001" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -40808,13 +42446,18 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, @@ -40966,12 +42609,7 @@ "pricing": { "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000001" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -41056,13 +42694,18 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, @@ -41121,13 +42764,13 @@ "warning_message": null }, { - "author": "deepseek", + "author": "deepseek-ai", "context_length": 163840, - "created_at": "2025-04-30T11:38:14.302503+00:00", + "created_at": "2024-12-26T19:28:40.559917+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics. Likely an upgrade from [DeepSeek-Prover-V1.5](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V1.5-RL) Not much is known about the model yet, as DeepSeek released it on Hugging Face without an announcement or description.", + "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, @@ -41144,13 +42787,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "1dd315ed-3fb8-4015-beeb-9a6eb0dd5a03", + "id": "5294d55f-9012-496b-8f22-8cc919432dcd", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -41159,49 +42802,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 163840, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-04-30T11:38:14.302503+00:00", + "author": "deepseek-ai", + "context_length": 131072, + "created_at": "2024-12-26T19:28:40.559917+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek Prover V2 is a 671B parameter model, speculated to be geared towards logic and mathematics. Likely an upgrade from [DeepSeek-Prover-V1.5](https://huggingface.co/deepseek-ai/DeepSeek-Prover-V1.5-RL) Not much is known about the model yet, as DeepSeek released it on Hugging Face without an announcement or description.", + "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", "features": {}, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-Prover-V2-671B", + "hf_slug": "deepseek-ai/DeepSeek-V3", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek Prover V2", + "name": "DeepSeek: DeepSeek V3", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-prover-v2", + "permaslug": "deepseek/deepseek-chat-v3", "reasoning_config": null, "router": null, - "short_name": "DeepSeek Prover V2", - "slug": "deepseek/deepseek-prover-v2", + "short_name": "DeepSeek V3", + "slug": "deepseek/deepseek-chat", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-prover-v2", - "model_variant_slug": "deepseek/deepseek-prover-v2", + "model_variant_permaslug": "deepseek/deepseek-chat-v3", + "model_variant_slug": "deepseek/deepseek-chat", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-prover-v2", + "name": "DeepInfra | deepseek/deepseek-chat-v3", "pricing": { - "completion": "0.00000218", + "completion": "0.00000089", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000032" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -41286,21 +42924,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-Prover-V2-671B", + "provider_model_id": "deepseek-ai/DeepSeek-V3", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/fp4", + "quantization": "fp4", "supported_parameters": [ "max_tokens", "temperature", @@ -41323,30 +42966,30 @@ "features": {}, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-Prover-V2-671B", + "hf_slug": "deepseek-ai/DeepSeek-V3", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek Prover V2", + "name": "DeepSeek: DeepSeek V3", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-prover-v2", + "permaslug": "deepseek/deepseek-chat-v3", "reasoning_config": null, "router": null, - "short_name": "DeepSeek Prover V2", - "slug": "deepseek/deepseek-prover-v2", + "short_name": "DeepSeek V3", + "slug": "deepseek/deepseek-chat", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek-ai", + "author": "deepseek", "context_length": 163840, - "created_at": "2024-12-26T19:28:40.559917+00:00", + "created_at": "2025-03-24T13:59:15.252028+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, @@ -41369,7 +43012,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5294d55f-9012-496b-8f22-8cc919432dcd", + "id": "820376cb-f110-4d56-ab52-5bd6ca269420", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -41378,49 +43021,45 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 163840, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek-ai", + "author": "deepseek", "context_length": 131072, - "created_at": "2024-12-26T19:28:40.559917+00:00", + "created_at": "2025-03-24T13:59:15.252028+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "features": {}, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3", + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3", + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3", + "permaslug": "deepseek/deepseek-chat-v3-0324", "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3", - "slug": "deepseek/deepseek-chat", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3", - "model_variant_slug": "deepseek/deepseek-chat", + "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", + "model_variant_slug": "deepseek/deepseek-chat-v3-0324", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-chat-v3", + "name": "DeepInfra | deepseek/deepseek-chat-v3-0324", "pricing": { - "completion": "0.00000089", + "completion": "0.00000077", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000032", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000135", + "prompt": "0.0000002" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -41505,17 +43144,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-V3", + "provider_model_id": "deepseek-ai/DeepSeek-V3-0324", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp4", @@ -41542,30 +43186,30 @@ "features": {}, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3", + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3", + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3", + "permaslug": "deepseek/deepseek-chat-v3-0324", "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3", - "slug": "deepseek/deepseek-chat", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "deepseek", "context_length": 163840, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "created_at": "2025-08-21T12:33:48+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, @@ -41579,23 +43223,24 @@ }, "features": { "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "820376cb-f110-4d56-ab52-5bd6ca269420", + "id": "012f7d1f-23d4-4d03-9aa3-432d8ad82757", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 500, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, @@ -41603,44 +43248,49 @@ "model": { "author": "deepseek", "context_length": 131072, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "created_at": "2025-08-21T12:33:48+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", - "features": {}, + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "deepseek-ai/DeepSeek-V3.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "instruct_type": "deepseek-v3.1", + "model_version_group_id": null, + "name": "DeepSeek: DeepSeek V3.1", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-chat-v3.1", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "DeepSeek V3.1", + "slug": "deepseek/deepseek-chat-v3.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", - "model_variant_slug": "deepseek/deepseek-chat-v3-0324", + "model_variant_permaslug": "deepseek/deepseek-chat-v3.1", + "model_variant_slug": "deepseek/deepseek-chat-v3.1", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-chat-v3-0324", + "name": "DeepInfra | deepseek/deepseek-chat-v3.1", "pricing": { - "completion": "0.00000088", + "completion": "0.00000079", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000106", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000001300000002", + "prompt": "0.00000021" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -41725,22 +43375,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-V3-0324", + "provider_model_id": "deepseek-ai/DeepSeek-V3.1", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp4", "quantization": "fp4", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -41751,41 +43408,57 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "deepseek-ai/DeepSeek-V3.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "instruct_type": "deepseek-v3.1", + "model_version_group_id": null, + "name": "DeepSeek: DeepSeek V3.1", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-chat-v3.1", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "DeepSeek V3.1", + "slug": "deepseek/deepseek-chat-v3.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "deepseek", "context_length": 163840, - "created_at": "2025-08-21T12:33:48+00:00", - "default_parameters": {}, + "created_at": "2025-09-22T13:37:55.611452+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, @@ -41798,8 +43471,6 @@ "training": false }, "features": { - "supported_parameters": {}, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -41809,14 +43480,14 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "012f7d1f-23d4-4d03-9aa3-432d8ad82757", + "id": "bc1ebdd6-6789-4ac4-95ee-2a9a663f9a1d", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 500, + "limit_rpm": null, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, @@ -41824,11 +43495,15 @@ "model": { "author": "deepseek", "context_length": 131072, - "created_at": "2025-08-21T12:33:48+00:00", - "default_parameters": {}, + "created_at": "2025-09-22T13:37:55.611452+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", "features": { "reasoning_config": { "end_token": "", @@ -41838,40 +43513,35 @@ }, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_slug": "deepseek-ai/DeepSeek-V3.1-Terminus", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1", + "name": "DeepSeek: DeepSeek V3.1 Terminus", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3.1", + "permaslug": "deepseek/deepseek-v3.1-terminus", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.1", - "slug": "deepseek/deepseek-chat-v3.1", + "short_name": "DeepSeek V3.1 Terminus", + "slug": "deepseek/deepseek-v3.1-terminus", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3.1", - "model_variant_slug": "deepseek/deepseek-chat-v3.1", + "model_variant_permaslug": "deepseek/deepseek-v3.1-terminus", + "model_variant_slug": "deepseek/deepseek-v3.1-terminus", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-chat-v3.1", + "name": "DeepInfra | deepseek/deepseek-v3.1-terminus", "pricing": { "completion": "0.00000079", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000168", - "internal_reasoning": "0", - "prompt": "0.00000021", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000001300000002", + "prompt": "0.00000021" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -41956,17 +43626,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-V3.1", + "provider_model_id": "deepseek-ai/DeepSeek-V3.1-Terminus", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp4", @@ -42003,38 +43678,38 @@ }, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_slug": "deepseek-ai/DeepSeek-V3.1-Terminus", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1", + "name": "DeepSeek: DeepSeek V3.1 Terminus", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3.1", + "permaslug": "deepseek/deepseek-v3.1-terminus", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.1", - "slug": "deepseek/deepseek-chat-v3.1", + "short_name": "DeepSeek V3.1 Terminus", + "slug": "deepseek/deepseek-v3.1-terminus", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "deepseek", "context_length": 163840, - "created_at": "2025-09-22T13:37:55.611452+00:00", + "created_at": "2025-12-01T13:10:42.818885+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, @@ -42047,6 +43722,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -42056,7 +43732,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "bc1ebdd6-6789-4ac4-95ee-2a9a663f9a1d", + "id": "f270e10b-1d21-4345-9e4e-662ddb5ba441", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -42071,16 +43747,17 @@ "model": { "author": "deepseek", "context_length": 131072, - "created_at": "2025-09-22T13:37:55.611452+00:00", + "created_at": "2025-12-01T13:10:42.818885+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", @@ -42089,40 +43766,35 @@ }, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1-Terminus", + "hf_slug": "deepseek-ai/DeepSeek-V3.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1 Terminus", + "name": "DeepSeek: DeepSeek V3.2", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.1-terminus", + "permaslug": "deepseek/deepseek-v3.2-20251201", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.1 Terminus", - "slug": "deepseek/deepseek-v3.1-terminus", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "DeepSeek V3.2", + "slug": "deepseek/deepseek-v3.2", + "updated_at": "2025-12-01T14:46:05.824401+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.1-terminus", - "model_variant_slug": "deepseek/deepseek-v3.1-terminus", + "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", + "model_variant_slug": "deepseek/deepseek-v3.2", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-v3.1-terminus", + "name": "DeepInfra | deepseek/deepseek-v3.2-20251201", "pricing": { - "completion": "0.00000079", + "completion": "0.00000038", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000168", - "internal_reasoning": "0", - "prompt": "0.00000021", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000013", + "prompt": "0.00000026" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -42207,17 +43879,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-V3.1-Terminus", + "provider_model_id": "deepseek-ai/DeepSeek-V3.2", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp4", @@ -42236,8 +43913,8 @@ "seed", "min_p", "response_format", - "tools", - "tool_choice" + "tool_choice", + "tools" ], "supports_multipart": true, "supports_reasoning": true, @@ -42246,6 +43923,7 @@ "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", @@ -42254,38 +43932,38 @@ }, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1-Terminus", + "hf_slug": "deepseek-ai/DeepSeek-V3.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1 Terminus", + "name": "DeepSeek: DeepSeek V3.2", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.1-terminus", + "permaslug": "deepseek/deepseek-v3.2-20251201", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.1 Terminus", - "slug": "deepseek/deepseek-v3.1-terminus", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "DeepSeek V3.2", + "slug": "deepseek/deepseek-v3.2", + "updated_at": "2025-12-01T14:46:05.824401+00:00", "warning_message": null }, { "author": "deepseek", "context_length": 163840, - "created_at": "2025-12-01T13:10:42.818885+00:00", + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, @@ -42298,7 +43976,6 @@ "training": false }, "features": { - "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -42308,7 +43985,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f270e10b-1d21-4345-9e4e-662ddb5ba441", + "id": "0f47837d-9d06-4c68-a93c-29b0c7cd9027", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -42322,16 +43999,16 @@ "max_tokens_per_image": null, "model": { "author": "deepseek", - "context_length": 131072, - "created_at": "2025-12-01T13:10:42.818885+00:00", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -42342,40 +44019,35 @@ }, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "deepseek/deepseek-r1-0528", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", - "model_variant_slug": "deepseek/deepseek-v3.2", + "model_variant_permaslug": "deepseek/deepseek-r1-0528", + "model_variant_slug": "deepseek/deepseek-r1-0528", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-v3.2-20251201", + "name": "DeepInfra | deepseek/deepseek-r1-0528", "pricing": { - "completion": "0.00000039", + "completion": "0.00000215", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000013", - "internal_reasoning": "0", - "prompt": "0.00000026", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000035", + "prompt": "0.0000005" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -42460,17 +44132,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-V3.2", + "provider_model_id": "deepseek-ai/DeepSeek-R1-0528", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp4", @@ -42488,13 +44165,11 @@ "top_k", "seed", "min_p", - "response_format", - "tool_choice", - "tools" + "response_format" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -42508,42 +44183,38 @@ }, "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "deepseek/deepseek-r1-0528", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, { "author": "deepseek", - "context_length": 163840, - "created_at": "2025-09-29T12:54:41.802445+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": 0.95 - }, + "context_length": 131072, + "created_at": "2025-01-23T20:12:49.780212+00:00", + "default_parameters": {}, "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model was trained under conditions aligned with V3.1-Terminus to enable direct comparison. Benchmarking shows performance roughly on par with V3.1 across reasoning, coding, and agentic tool-use tasks, with minor tradeoffs and gains depending on the domain. This release focuses on validating architectural optimizations for extended context lengths rather than advancing raw task accuracy, making it primarily a research-oriented model for exploring efficient transformer designs.", + "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -42552,10 +44223,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -42565,7 +44232,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cb6cc53f-6128-45f4-8219-47dbf8faa059", + "id": "281e8d8c-3d82-436d-b61e-6ed75158513c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -42574,65 +44241,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "deepseek", - "context_length": 131072, - "created_at": "2025-09-29T12:54:41.802445+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": 0.95 - }, + "context_length": 128000, + "created_at": "2025-01-23T20:12:49.780212+00:00", + "default_parameters": {}, "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model was trained under conditions aligned with V3.1-Terminus to enable direct comparison. Benchmarking shows performance roughly on par with V3.1 across reasoning, coding, and agentic tool-use tasks, with minor tradeoffs and gains depending on the domain. This release focuses on validating architectural optimizations for extended context lengths rather than advancing raw task accuracy, making it primarily a research-oriented model for exploring efficient transformer designs.", + "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "DeepSeek", + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2-Exp", + "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", - "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2 Exp", + "instruct_type": "deepseek-r1", + "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", + "name": "DeepSeek: R1 Distill Llama 70B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-exp", + "permaslug": "deepseek/deepseek-r1-distill-llama-70b", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "DeepSeek V3.2 Exp", - "slug": "deepseek/deepseek-v3.2-exp", + "short_name": "R1 Distill Llama 70B", + "slug": "deepseek/deepseek-r1-distill-llama-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.2-exp", - "model_variant_slug": "deepseek/deepseek-v3.2-exp", + "model_variant_permaslug": "deepseek/deepseek-r1-distill-llama-70b", + "model_variant_slug": "deepseek/deepseek-r1-distill-llama-70b", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-v3.2-exp", + "name": "DeepInfra | deepseek/deepseek-r1-distill-llama-70b", "pricing": { - "completion": "0.00000032", + "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000168", - "internal_reasoning": "0", - "prompt": "0.00000021", - "request": "0", - "web_search": "0" + "prompt": "0.0000007" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -42717,21 +44371,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-V3.2-Exp", + "provider_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp4", - "quantization": "fp4", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", @@ -42745,58 +44404,53 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "DeepSeek", + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2-Exp", + "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", - "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2 Exp", + "instruct_type": "deepseek-r1", + "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", + "name": "DeepSeek: R1 Distill Llama 70B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-exp", + "permaslug": "deepseek/deepseek-r1-distill-llama-70b", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "DeepSeek V3.2 Exp", - "slug": "deepseek/deepseek-v3.2-exp", + "short_name": "R1 Distill Llama 70B", + "slug": "deepseek/deepseek-r1-distill-llama-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-01-20T13:51:35.96912+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-13T21:50:25.140801+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["", "", ""], "default_system": null, - "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -42809,13 +44463,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "cc004fe3-9ed7-4490-b69a-3b83ab1a1db6", + "id": "eb06dc92-5a16-47ec-a776-6ef956457c47", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -42824,57 +44478,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 163840, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-01-20T13:51:35.96912+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-13T21:50:25.140801+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["", "", ""], "default_system": null, - "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "DeepSeek", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)", + "features": {}, + "group": "Gemini", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1", + "hf_slug": "google/gemma-3-12b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 12B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "google/gemma-3-12b-it", + "reasoning_config": null, "router": null, - "short_name": "R1", - "slug": "deepseek/deepseek-r1", + "short_name": "Gemma 3 12B", + "slug": "google/gemma-3-12b-it", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1", - "model_variant_slug": "deepseek/deepseek-r1", + "model_variant_permaslug": "google/gemma-3-12b-it", + "model_variant_slug": "google/gemma-3-12b-it", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-r1", + "name": "DeepInfra | google/gemma-3-12b-it", "pricing": { - "completion": "0.0000024", + "completion": "0.00000013", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000007", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -42959,24 +44600,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra/base", + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], + "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-R1", + "provider_model_id": "google/gemma-3-12b-it", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/base", - "quantization": "fp4", + "provider_slug": "deepinfra/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -42987,59 +44631,49 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "DeepSeek", + "features": {}, + "group": "Gemini", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1", + "hf_slug": "google/gemma-3-12b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 12B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "google/gemma-3-12b-it", + "reasoning_config": null, "router": null, - "short_name": "R1", - "slug": "deepseek/deepseek-r1", + "short_name": "Gemma 3 12B", + "slug": "google/gemma-3-12b-it", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-05-28T17:59:30.833128+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-12T05:12:39.645813+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["", "", ""], "default_system": null, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -43048,6 +44682,7 @@ "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -43057,74 +44692,68 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "0f47837d-9d06-4c68-a93c-29b0c7cd9027", + "id": "8f22002c-c045-446f-a1b9-9896133536b8", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 80, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-05-28T17:59:30.833128+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-12T05:12:39.645813+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["", "", ""], "default_system": null, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "DeepSeek", + "group": "Gemini", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": null, - "name": "DeepSeek: R1 0528", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "R1 0528", - "slug": "deepseek/deepseek-r1-0528", - "updated_at": "2026-01-08T20:10:31.314892+00:00", + "short_name": "Gemma 3 27B", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-0528", - "model_variant_slug": "deepseek/deepseek-r1-0528", + "model_variant_permaslug": "google/gemma-3-27b-it", + "model_variant_slug": "google/gemma-3-27b-it", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-r1-0528", + "name": "DeepInfra | google/gemma-3-27b-it", "pricing": { - "completion": "0.00000215", + "completion": "0.00000016", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000004", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000008" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -43209,24 +44838,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-R1-0528", + "provider_model_id": "google/gemma-3-27b-it", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp4", - "quantization": "fp4", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -43240,7 +44872,7 @@ "response_format" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" @@ -43248,41 +44880,41 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "DeepSeek", + "group": "Gemini", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": null, - "name": "DeepSeek: R1 0528", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "R1 0528", - "slug": "deepseek/deepseek-r1-0528", - "updated_at": "2026-01-08T20:10:31.314892+00:00", + "short_name": "Gemma 3 27B", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, { - "author": "deepseek", + "author": "google", "context_length": 131072, - "created_at": "2025-01-23T20:12:49.780212+00:00", + "created_at": "2025-03-13T22:38:30.653142+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["", "", ""], "default_system": null, - "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, @@ -43295,6 +44927,7 @@ "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -43304,7 +44937,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "281e8d8c-3d82-436d-b61e-6ed75158513c", + "id": "d3de3bd4-81bc-48fb-924f-2a87b2a36e75", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -43313,57 +44946,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 128000, - "created_at": "2025-01-23T20:12:49.780212+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-13T22:38:30.653142+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["", "", ""], "default_system": null, - "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Llama3", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.", + "features": {}, + "group": "Gemini", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "hf_slug": "google/gemma-3-4b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Llama 70B", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 4B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-llama-70b", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "google/gemma-3-4b-it", + "reasoning_config": null, "router": null, - "short_name": "R1 Distill Llama 70B", - "slug": "deepseek/deepseek-r1-distill-llama-70b", + "short_name": "Gemma 3 4B", + "slug": "google/gemma-3-4b-it", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-distill-llama-70b", - "model_variant_slug": "deepseek/deepseek-r1-distill-llama-70b", + "model_variant_permaslug": "google/gemma-3-4b-it", + "model_variant_slug": "google/gemma-3-4b-it", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-r1-distill-llama-70b", + "name": "DeepInfra | google/gemma-3-4b-it", "pricing": { - "completion": "0.0000012", + "completion": "0.00000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -43448,24 +45068,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "provider_model_id": "google/gemma-3-4b-it", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -43479,50 +45102,46 @@ "response_format" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Llama3", + "features": {}, + "group": "Gemini", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "hf_slug": "google/gemma-3-4b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Llama 70B", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 4B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-llama-70b", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "google/gemma-3-4b-it", + "reasoning_config": null, "router": null, - "short_name": "R1 Distill Llama 70B", - "slug": "deepseek/deepseek-r1-distill-llama-70b", + "short_name": "Gemma 3 4B", + "slug": "google/gemma-3-4b-it", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-01-29T23:53:50.865297+00:00", - "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "intfloat", + "context_length": 512, + "created_at": "2025-11-18T02:33:12.746929+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\\n\\nOther benchmark results include:\\n\\n- AIME 2024 pass@1: 72.6\\n- MATH-500 pass@1: 94.3\\n- CodeForces Rating: 1691\\n\\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "The e5-base-v2 embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, similarity scoring, retrieval and clustering.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 512, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -43531,7 +45150,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -43541,7 +45159,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "412eb973-02c5-45e3-8255-4c2bfa763338", + "id": "77d47f3a-17aa-45a4-9737-59fdaddabd85", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -43554,53 +45172,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 128000, - "created_at": "2025-01-29T23:53:50.865297+00:00", - "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "intfloat", + "context_length": 8192, + "created_at": "2025-11-18T02:33:12.746929+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\\n\\nOther benchmark results include:\\n\\n- AIME 2024 pass@1: 72.6\\n- MATH-500 pass@1: 94.3\\n- CodeForces Rating: 1691\\n\\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "The e5-base-v2 embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, similarity scoring, retrieval and clustering.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen", - "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "group": "Other", + "has_text_output": false, + "hf_slug": "intfloat/e5-base-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Qwen 32B", - "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-qwen-32b", + "instruct_type": null, + "model_version_group_id": null, + "name": "Intfloat: E5-Base-v2", + "output_modalities": ["embeddings"], + "permaslug": "intfloat/e5-base-v2-20251117", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "R1 Distill Qwen 32B", - "slug": "deepseek/deepseek-r1-distill-qwen-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "E5-Base-v2", + "slug": "intfloat/e5-base-v2", + "updated_at": "2025-11-18T16:29:22.862506+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-distill-qwen-32b", - "model_variant_slug": "deepseek/deepseek-r1-distill-qwen-32b", + "model_variant_permaslug": "intfloat/e5-base-v2-20251117", + "model_variant_slug": "intfloat/e5-base-v2", "moderation_required": false, - "name": "DeepInfra | deepseek/deepseek-r1-distill-qwen-32b", + "name": "DeepInfra | intfloat/e5-base-v2-20251117", "pricing": { - "completion": "0.00000027", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.000000005" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -43685,24 +45305,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "provider_model_id": "intfloat/e5-base-v2", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -43716,50 +45339,57 @@ "response_format" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen", - "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "group": "Other", + "has_text_output": false, + "hf_slug": "intfloat/e5-base-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Qwen 32B", - "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-qwen-32b", + "instruct_type": null, + "model_version_group_id": null, + "name": "Intfloat: E5-Base-v2", + "output_modalities": ["embeddings"], + "permaslug": "intfloat/e5-base-v2-20251117", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "R1 Distill Qwen 32B", - "slug": "deepseek/deepseek-r1-distill-qwen-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "E5-Base-v2", + "slug": "intfloat/e5-base-v2", + "updated_at": "2025-11-18T16:29:22.862506+00:00", "warning_message": null }, { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-13T21:50:25.140801+00:00", - "default_parameters": {}, - "default_stops": ["", "", ""], + "author": "intfloat", + "context_length": 512, + "created_at": "2025-11-18T02:37:12.233088+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)", + "description": "The e5-large-v2 embedding model maps English sentences, paragraphs, and documents into a 1024-dimensional dense vector space, delivering high-accuracy semantic embeddings optimized for retrieval, semantic search, reranking, and similarity-scoring tasks.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 512, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -43768,7 +45398,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -43778,7 +45407,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "eb06dc92-5a16-47ec-a776-6ef956457c47", + "id": "2e010c48-bb02-460e-8308-d5f3543cd9f0", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -43791,45 +45420,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-13T21:50:25.140801+00:00", - "default_parameters": {}, - "default_stops": ["", "", ""], + "author": "intfloat", + "context_length": 8192, + "created_at": "2025-11-18T02:37:12.233088+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)", - "features": {}, - "group": "Gemini", - "has_text_output": true, - "hf_slug": "google/gemma-3-12b-it", + "description": "The e5-large-v2 embedding model maps English sentences, paragraphs, and documents into a 1024-dimensional dense vector space, delivering high-accuracy semantic embeddings optimized for retrieval, semantic search, reranking, and similarity-scoring tasks.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": false, + "hf_slug": "intfloat/e5-large-v2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 12B", - "output_modalities": ["text"], - "permaslug": "google/gemma-3-12b-it", - "reasoning_config": null, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Intfloat: E5-Large-v2", + "output_modalities": ["embeddings"], + "permaslug": "intfloat/e5-large-v2-20251117", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Gemma 3 12B", - "slug": "google/gemma-3-12b-it", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "E5-Large-v2", + "slug": "intfloat/e5-large-v2", + "updated_at": "2025-11-18T16:29:07.784492+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-3-12b-it", - "model_variant_slug": "google/gemma-3-12b-it", + "model_variant_permaslug": "intfloat/e5-large-v2-20251117", + "model_variant_slug": "intfloat/e5-large-v2", "moderation_required": false, - "name": "DeepInfra | google/gemma-3-12b-it", + "name": "DeepInfra | intfloat/e5-large-v2-20251117", "pricing": { - "completion": "0.00000013", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000001" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -43914,21 +45553,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "google/gemma-3-12b-it", + "provider_model_id": "intfloat/e5-large-v2", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -43948,41 +45592,52 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Gemini", - "has_text_output": true, - "hf_slug": "google/gemma-3-12b-it", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": false, + "hf_slug": "intfloat/e5-large-v2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 12B", - "output_modalities": ["text"], - "permaslug": "google/gemma-3-12b-it", - "reasoning_config": null, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Intfloat: E5-Large-v2", + "output_modalities": ["embeddings"], + "permaslug": "intfloat/e5-large-v2-20251117", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Gemma 3 12B", - "slug": "google/gemma-3-12b-it", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "E5-Large-v2", + "slug": "intfloat/e5-large-v2", + "updated_at": "2025-11-18T16:29:07.784492+00:00", "warning_message": null }, { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-12T05:12:39.645813+00:00", + "author": "intfloat", + "context_length": 512, + "created_at": "2025-11-18T02:30:47.410918+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["", "", ""], + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "The multilingual-e5-large embedding model encodes sentences, paragraphs, and documents across over 90 languages into a 1024-dimensional dense vector space, delivering robust semantic embeddings optimized for multilingual retrieval, cross-language similarity, and large-scale data search.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 512, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -43991,7 +45646,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -44001,30 +45655,30 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "8f22002c-c045-446f-a1b9-9896133536b8", + "id": "5580bd22-b8af-45b6-8e3e-c517e388e2c2", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 80, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-12T05:12:39.645813+00:00", + "author": "intfloat", + "context_length": 8192, + "created_at": "2025-11-18T02:30:47.410918+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["", "", ""], + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "The multilingual-e5-large embedding model encodes sentences, paragraphs, and documents across over 90 languages into a 1024-dimensional dense vector space, delivering robust semantic embeddings optimized for multilingual retrieval, cross-language similarity, and large-scale data search.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -44033,41 +45687,36 @@ "system_prompt": null } }, - "group": "Gemini", - "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "group": "Other", + "has_text_output": false, + "hf_slug": "intfloat/multilingual-e5-large", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", - "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Intfloat: Multilingual-E5-Large", + "output_modalities": ["embeddings"], + "permaslug": "intfloat/multilingual-e5-large-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "Multilingual-E5-Large", + "slug": "intfloat/multilingual-e5-large", + "updated_at": "2025-11-18T16:29:36.757792+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-3-27b-it", - "model_variant_slug": "google/gemma-3-27b-it", + "model_variant_permaslug": "intfloat/multilingual-e5-large-20251117", + "model_variant_slug": "intfloat/multilingual-e5-large", "moderation_required": false, - "name": "DeepInfra | google/gemma-3-27b-it", + "name": "DeepInfra | intfloat/multilingual-e5-large-20251117", "pricing": { - "completion": "0.00000016", + "completion": "0", "discount": 0, - "image": "0.0000256", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.00000001" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -44152,21 +45801,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "google/gemma-3-27b-it", + "provider_model_id": "intfloat/multilingual-e5-large", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/fp32", + "quantization": "fp32", "supported_parameters": [ "max_tokens", "temperature", @@ -44194,40 +45848,40 @@ "system_prompt": null } }, - "group": "Gemini", - "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "group": "Other", + "has_text_output": false, + "hf_slug": "intfloat/multilingual-e5-large", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", - "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Intfloat: Multilingual-E5-Large", + "output_modalities": ["embeddings"], + "permaslug": "intfloat/multilingual-e5-large-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "Multilingual-E5-Large", + "slug": "intfloat/multilingual-e5-large", + "updated_at": "2025-11-18T16:29:36.757792+00:00", "warning_message": null }, { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-13T22:38:30.653142+00:00", + "author": "meta-llama", + "context_length": 8192, + "created_at": "2024-04-18T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["", "", ""], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -44236,17 +45890,16 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "d3de3bd4-81bc-48fb-924f-2a87b2a36e75", + "id": "c6e34375-c207-4d60-9a43-38b2b730815a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -44255,49 +45908,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-13T22:38:30.653142+00:00", + "author": "meta-llama", + "context_length": 8192, + "created_at": "2024-04-18T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["", "", ""], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "features": {}, - "group": "Gemini", + "group": "Llama3", "has_text_output": true, - "hf_slug": "google/gemma-3-4b-it", + "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 4B", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3 8B Instruct", "output_modalities": ["text"], - "permaslug": "google/gemma-3-4b-it", + "permaslug": "meta-llama/llama-3-8b-instruct", "reasoning_config": null, "router": null, - "short_name": "Gemma 3 4B", - "slug": "google/gemma-3-4b-it", + "short_name": "Llama 3 8B Instruct", + "slug": "meta-llama/llama-3-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-3-4b-it", - "model_variant_slug": "google/gemma-3-4b-it", + "model_variant_permaslug": "meta-llama/llama-3-8b-instruct", + "model_variant_slug": "meta-llama/llama-3-8b-instruct", "moderation_required": false, - "name": "DeepInfra | google/gemma-3-4b-it", + "name": "DeepInfra | meta-llama/llama-3-8b-instruct", "pricing": { - "completion": "0.00000008", + "completion": "0.00000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000003" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -44382,17 +46030,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "google/gemma-3-4b-it", + "provider_model_id": "meta-llama/Meta-Llama-3-8B-Instruct", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/bf16", @@ -44408,49 +46061,47 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Gemini", + "group": "Llama3", "has_text_output": true, - "hf_slug": "google/gemma-3-4b-it", + "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 4B", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3 8B Instruct", "output_modalities": ["text"], - "permaslug": "google/gemma-3-4b-it", + "permaslug": "meta-llama/llama-3-8b-instruct", "reasoning_config": null, "router": null, - "short_name": "Gemma 3 4B", - "slug": "google/gemma-3-4b-it", + "short_name": "Llama 3 8B Instruct", + "slug": "meta-llama/llama-3-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "intfloat", - "context_length": 512, - "created_at": "2025-11-18T02:33:12.746929+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "The e5-base-v2 embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, similarity scoring, retrieval and clustering.", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 512, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -44459,16 +46110,19 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "77d47f3a-17aa-45a4-9737-59fdaddabd85", + "id": "59c87462-40b6-4231-91cf-6d0f8f25e8b9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -44481,60 +46135,40 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "intfloat", - "context_length": 8192, - "created_at": "2025-11-18T02:33:12.746929+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "The e5-base-v2 embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, similarity scoring, retrieval and clustering.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": false, - "hf_slug": "intfloat/e5-base-v2", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Intfloat: E5-Base-v2", - "output_modalities": ["embeddings"], - "permaslug": "intfloat/e5-base-v2-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.1 70B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.1-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "E5-Base-v2", - "slug": "intfloat/e5-base-v2", - "updated_at": "2025-11-18T16:29:22.862506+00:00", + "short_name": "Llama 3.1 70B Instruct", + "slug": "meta-llama/llama-3.1-70b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "intfloat/e5-base-v2-20251117", - "model_variant_slug": "intfloat/e5-base-v2", + "model_variant_permaslug": "meta-llama/llama-3.1-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-70b-instruct", "moderation_required": false, - "name": "DeepInfra | intfloat/e5-base-v2-20251117", + "name": "DeepInfra | meta-llama/llama-3.1-70b-instruct", "pricing": { - "completion": "0", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -44619,22 +46253,28 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], + "slug": "deepinfra/base", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "intfloat/e5-base-v2", + "provider_model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra", - "quantization": "unknown", + "provider_slug": "deepinfra/base", + "quantization": "bf16", "supported_parameters": [ + "response_format", "max_tokens", "temperature", "top_p", @@ -44645,60 +46285,46 @@ "top_k", "seed", "min_p", - "response_format" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": false, - "hf_slug": "intfloat/e5-base-v2", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Intfloat: E5-Base-v2", - "output_modalities": ["embeddings"], - "permaslug": "intfloat/e5-base-v2-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.1 70B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.1-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "E5-Base-v2", - "slug": "intfloat/e5-base-v2", - "updated_at": "2025-11-18T16:29:22.862506+00:00", + "short_name": "Llama 3.1 70B Instruct", + "slug": "meta-llama/llama-3.1-70b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "intfloat", - "context_length": 512, - "created_at": "2025-11-18T02:37:12.233088+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "The e5-large-v2 embedding model maps English sentences, paragraphs, and documents into a 1024-dimensional dense vector space, delivering high-accuracy semantic embeddings optimized for retrieval, semantic search, reranking, and similarity-scoring tasks.", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 512, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -44707,16 +46333,20 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "2e010c48-bb02-460e-8308-d5f3543cd9f0", + "id": "858e9b98-fa86-433e-8299-17c3c4d6c24f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -44725,64 +46355,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "intfloat", - "context_length": 8192, - "created_at": "2025-11-18T02:37:12.233088+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "The e5-large-v2 embedding model maps English sentences, paragraphs, and documents into a 1024-dimensional dense vector space, delivering high-accuracy semantic embeddings optimized for retrieval, semantic search, reranking, and similarity-scoring tasks.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": false, - "hf_slug": "intfloat/e5-large-v2", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Intfloat: E5-Large-v2", - "output_modalities": ["embeddings"], - "permaslug": "intfloat/e5-large-v2-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.1-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "E5-Large-v2", - "slug": "intfloat/e5-large-v2", - "updated_at": "2025-11-18T16:29:07.784492+00:00", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "intfloat/e5-large-v2-20251117", - "model_variant_slug": "intfloat/e5-large-v2", + "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", "moderation_required": false, - "name": "DeepInfra | intfloat/e5-large-v2-20251117", + "name": "DeepInfra | meta-llama/llama-3.1-8b-instruct", "pricing": { - "completion": "0", + "completion": "0.00000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000002" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -44867,22 +46477,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "intfloat/e5-large-v2", + "provider_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra", - "quantization": "unknown", + "provider_slug": "deepinfra/bf16", + "quantization": "bf16", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -44893,60 +46510,46 @@ "top_k", "seed", "min_p", - "response_format" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": false, - "hf_slug": "intfloat/e5-large-v2", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Intfloat: E5-Large-v2", - "output_modalities": ["embeddings"], - "permaslug": "intfloat/e5-large-v2-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.1-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "E5-Large-v2", - "slug": "intfloat/e5-large-v2", - "updated_at": "2025-11-18T16:29:07.784492+00:00", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "intfloat", - "context_length": 512, - "created_at": "2025-11-18T02:30:47.410918+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-09-25T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "The multilingual-e5-large embedding model encodes sentences, paragraphs, and documents across over 90 languages into a 1024-dimensional dense vector space, delivering robust semantic embeddings optimized for multilingual retrieval, cross-language similarity, and large-scale data search.", + "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 512, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -44964,7 +46567,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5580bd22-b8af-45b6-8e3e-c517e388e2c2", + "id": "4a07b512-e030-412d-b1d6-39773a8b8dcf", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -44973,64 +46576,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "intfloat", - "context_length": 8192, - "created_at": "2025-11-18T02:30:47.410918+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-09-25T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "The multilingual-e5-large embedding model encodes sentences, paragraphs, and documents across over 90 languages into a 1024-dimensional dense vector space, delivering robust semantic embeddings optimized for multilingual retrieval, cross-language similarity, and large-scale data search.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": false, - "hf_slug": "intfloat/multilingual-e5-large", + "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-3.2-11B-Vision-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, + "input_modalities": ["text", "image"], + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Intfloat: Multilingual-E5-Large", - "output_modalities": ["embeddings"], - "permaslug": "intfloat/multilingual-e5-large-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "name": "Meta: Llama 3.2 11B Vision Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.2-11b-vision-instruct", + "reasoning_config": null, "router": null, - "short_name": "Multilingual-E5-Large", - "slug": "intfloat/multilingual-e5-large", - "updated_at": "2025-11-18T16:29:36.757792+00:00", + "short_name": "Llama 3.2 11B Vision Instruct", + "slug": "meta-llama/llama-3.2-11b-vision-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "intfloat/multilingual-e5-large-20251117", - "model_variant_slug": "intfloat/multilingual-e5-large", + "model_variant_permaslug": "meta-llama/llama-3.2-11b-vision-instruct", + "model_variant_slug": "meta-llama/llama-3.2-11b-vision-instruct", "moderation_required": false, - "name": "DeepInfra | intfloat/multilingual-e5-large-20251117", + "name": "DeepInfra | meta-llama/llama-3.2-11b-vision-instruct", "pricing": { - "completion": "0", + "completion": "0.000000049", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000001", - "request": "0", - "web_search": "0" + "prompt": "0.000000049" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -45115,21 +46698,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "intfloat/multilingual-e5-large", + "provider_model_id": "meta-llama/Llama-3.2-11B-Vision-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp32", - "quantization": "fp32", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -45149,44 +46737,33 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": false, - "hf_slug": "intfloat/multilingual-e5-large", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-3.2-11B-Vision-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, + "input_modalities": ["text", "image"], + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Intfloat: Multilingual-E5-Large", - "output_modalities": ["embeddings"], - "permaslug": "intfloat/multilingual-e5-large-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "name": "Meta: Llama 3.2 11B Vision Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.2-11b-vision-instruct", + "reasoning_config": null, "router": null, - "short_name": "Multilingual-E5-Large", - "slug": "intfloat/multilingual-e5-large", - "updated_at": "2025-11-18T16:29:36.757792+00:00", + "short_name": "Llama 3.2 11B Vision Instruct", + "slug": "meta-llama/llama-3.2-11b-vision-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "meta-llama", "context_length": 131072, - "created_at": "2025-02-12T23:01:58.468577+00:00", + "created_at": "2024-09-25T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, @@ -45199,7 +46776,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -45209,7 +46785,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "485ff17d-72b9-442f-aa87-299e7a86832b", + "id": "e462c1ad-93b6-4047-b27d-239e3ba51989", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -45218,49 +46794,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "meta-llama", - "context_length": 0, - "created_at": "2025-02-12T23:01:58.468577+00:00", + "context_length": 131072, + "created_at": "2024-09-25T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-Guard-3-8B", + "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "none", + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Llama Guard 3 8B", + "name": "Meta: Llama 3.2 3B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-3-8b", + "permaslug": "meta-llama/llama-3.2-3b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama Guard 3 8B", - "slug": "meta-llama/llama-guard-3-8b", + "short_name": "Llama 3.2 3B Instruct", + "slug": "meta-llama/llama-3.2-3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-guard-3-8b", - "model_variant_slug": "meta-llama/llama-guard-3-8b", + "model_variant_permaslug": "meta-llama/llama-3.2-3b-instruct", + "model_variant_slug": "meta-llama/llama-3.2-3b-instruct", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-guard-3-8b", + "name": "DeepInfra | meta-llama/llama-3.2-3b-instruct", "pricing": { - "completion": "0.000000055", + "completion": "0.00000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000055", - "request": "0", - "web_search": "0" + "prompt": "0.00000002" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -45345,17 +46916,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Llama-Guard-3-8B", + "provider_model_id": "meta-llama/Llama-3.2-3B-Instruct", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/bf16", @@ -45382,34 +46958,34 @@ "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-Guard-3-8B", + "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "none", + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Llama Guard 3 8B", + "name": "Meta: Llama 3.2 3B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-3-8b", + "permaslug": "meta-llama/llama-3.2-3b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama Guard 3 8B", - "slug": "meta-llama/llama-guard-3-8b", + "short_name": "Llama 3.2 3B Instruct", + "slug": "meta-llama/llama-3.2-3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -45427,7 +47003,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "65aab538-54e0-41fa-9b74-687e4552302f", + "id": "e3b0a527-44d6-4ea6-9ec2-6a6416a84c7c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -45441,46 +47017,41 @@ "max_tokens_per_image": null, "model": { "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-70B-Instruct", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "llama3", "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3 70B Instruct", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-70b-instruct", + "permaslug": "meta-llama/llama-3.3-70b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3 70B Instruct", - "slug": "meta-llama/llama-3-70b-instruct", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3-70b-instruct", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-3-70b-instruct", + "name": "DeepInfra | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0.0000004", + "completion": "0.00000032", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "DeepInfra (Turbo)", "provider_info": { "adapterName": "DeepInfraAdapter", "baseUrl": "https://api.deepinfra.com/v1/openai", @@ -45492,7 +47063,7 @@ "termsOfServiceURL": "https://deepinfra.com/terms", "training": false }, - "displayName": "DeepInfra", + "displayName": "DeepInfra (Turbo)", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, @@ -45563,21 +47134,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], + "slug": "deepinfra/turbo", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Meta-Llama-3-70B-Instruct", + "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/turbo", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -45602,34 +47178,34 @@ "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-70B-Instruct", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "llama3", "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3 70B Instruct", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-70b-instruct", + "permaslug": "meta-llama/llama-3.3-70b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3 70B Instruct", - "slug": "meta-llama/llama-3-70b-instruct", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", + "context_length": 1048576, + "created_at": "2025-04-05T19:37:02.129674+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -45638,16 +47214,20 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "c6e34375-c207-4d60-9a43-38b2b730815a", + "id": "69a5d06e-1935-4aa5-903f-71058e64399f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -45658,47 +47238,42 @@ "limit_rpm_cf": null, "max_completion_tokens": 16384, "max_prompt_tokens": null, - "max_tokens_per_image": null, + "max_tokens_per_image": 3342, "model": { "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", + "context_length": 1048576, + "created_at": "2025-04-05T19:37:02.129674+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", "features": {}, - "group": "Llama3", + "group": "Llama4", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3 8B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-8b-instruct", + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3 8B Instruct", - "slug": "meta-llama/llama-3-8b-instruct", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3-8b-instruct", - "model_variant_slug": "meta-llama/llama-3-8b-instruct", + "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "model_variant_slug": "meta-llama/llama-4-maverick", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-3-8b-instruct", + "name": "DeepInfra | meta-llama/llama-4-maverick-17b-128e-instruct", "pricing": { - "completion": "0.00000006", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -45783,22 +47358,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], + "slug": "deepinfra/base", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Meta-Llama-3-8B-Instruct", + "provider_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/base", + "quantization": "fp8", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -45808,48 +47390,45 @@ "repetition_penalty", "top_k", "seed", - "min_p", - "response_format", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Llama4", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3 8B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-8b-instruct", + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3 8B Instruct", - "slug": "meta-llama/llama-3-8b-instruct", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "context_length": 327680, + "created_at": "2025-04-05T19:31:59.735804+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 327680, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -45858,9 +47437,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -45870,7 +47447,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "59c87462-40b6-4231-91cf-6d0f8f25e8b9", + "id": "9cf05ded-eefe-41b4-8c08-0c6460feffea", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -45879,49 +47456,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, - "max_tokens_per_image": null, + "max_tokens_per_image": 3342, "model": { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "context_length": 10000000, + "created_at": "2025-04-05T19:31:59.735804+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", "features": {}, - "group": "Llama3", + "group": "Llama4", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.1 70B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama 4 Scout", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-70b-instruct", + "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 70B Instruct", - "slug": "meta-llama/llama-3.1-70b-instruct", + "short_name": "Llama 4 Scout", + "slug": "meta-llama/llama-4-scout", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-70b-instruct", + "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "model_variant_slug": "meta-llama/llama-4-scout", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-3.1-70b-instruct", + "name": "DeepInfra | meta-llama/llama-4-scout-17b-16e-instruct", "pricing": { - "completion": "0.0000004", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000008" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -46006,23 +47578,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra/base", + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], + "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "provider_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/base", - "quantization": "bf16", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ - "response_format", "max_tokens", "temperature", "top_p", @@ -46033,46 +47609,45 @@ "top_k", "seed", "min_p", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Llama4", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.1 70B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama 4 Scout", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-70b-instruct", + "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 70B Instruct", - "slug": "meta-llama/llama-3.1-70b-instruct", + "short_name": "Llama 4 Scout", + "slug": "meta-llama/llama-4-scout", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "context_length": 163840, + "created_at": "2025-04-30T01:06:33.531556+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 163840, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -46081,20 +47656,17 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "858e9b98-fa86-433e-8299-17c3c4d6c24f", + "id": "850b84c3-42a7-4cec-99c0-b5582d0da66b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -46103,49 +47675,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "context_length": 163840, + "created_at": "2025-04-30T01:06:33.531556+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": "meta-llama/Llama-Guard-4-12B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "input_modalities": ["image", "text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama Guard 4 12B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", + "permaslug": "meta-llama/llama-guard-4-12b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", + "short_name": "Llama Guard 4 12B", + "slug": "meta-llama/llama-guard-4-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_permaslug": "meta-llama/llama-guard-4-12b", + "model_variant_slug": "meta-llama/llama-guard-4-12b", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-3.1-8b-instruct", + "name": "DeepInfra | meta-llama/llama-guard-4-12b", "pricing": { - "completion": "0.00000005", + "completion": "0.00000018", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000018" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -46230,24 +47797,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "provider_model_id": "meta-llama/Llama-Guard-4-12B", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/bf16", "quantization": "bf16", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", @@ -46258,46 +47828,45 @@ "top_k", "seed", "min_p", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": "meta-llama/Llama-Guard-4-12B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "input_modalities": ["image", "text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama Guard 4 12B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", + "permaslug": "meta-llama/llama-guard-4-12b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", + "short_name": "Llama Guard 4 12B", + "slug": "meta-llama/llama-guard-4-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", + "author": "microsoft", + "context_length": 16384, + "created_at": "2025-01-10T06:17:52.16346+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 16384, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -46315,7 +47884,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "4a07b512-e030-412d-b1d6-39773a8b8dcf", + "id": "8e48942d-17fc-4d00-a234-4723034fd971", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -46325,48 +47894,43 @@ "limit_rpm": null, "limit_rpm_cf": null, "max_completion_tokens": 16384, - "max_prompt_tokens": null, + "max_prompt_tokens": 4096, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", + "author": "microsoft", + "context_length": 16384, + "created_at": "2025-01-10T06:17:52.16346+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "hf_slug": "microsoft/phi-4", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "llama3", + "input_modalities": ["text"], + "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 3.2 11B Vision Instruct", + "name": "Microsoft: Phi 4", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-11b-vision-instruct", + "permaslug": "microsoft/phi-4", "reasoning_config": null, "router": null, - "short_name": "Llama 3.2 11B Vision Instruct", - "slug": "meta-llama/llama-3.2-11b-vision-instruct", + "short_name": "Phi 4", + "slug": "microsoft/phi-4", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.2-11b-vision-instruct", - "model_variant_slug": "meta-llama/llama-3.2-11b-vision-instruct", + "model_variant_permaslug": "microsoft/phi-4", + "model_variant_slug": "microsoft/phi-4", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-3.2-11b-vision-instruct", + "name": "DeepInfra | microsoft/phi-4", "pricing": { - "completion": "0.000000049", + "completion": "0.00000014", "discount": 0, - "image": "0.00007948", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000049", - "request": "0", - "web_search": "0" + "prompt": "0.00000007" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -46451,21 +48015,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "provider_model_id": "microsoft/phi-4", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", @@ -46486,36 +48055,40 @@ "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "hf_slug": "microsoft/phi-4", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "llama3", + "input_modalities": ["text"], + "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 3.2 11B Vision Instruct", + "name": "Microsoft: Phi 4", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-11b-vision-instruct", + "permaslug": "microsoft/phi-4", "reasoning_config": null, "router": null, - "short_name": "Llama 3.2 11B Vision Instruct", - "slug": "meta-llama/llama-3.2-11b-vision-instruct", + "short_name": "Phi 4", + "slug": "microsoft/phi-4", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], - "default_system": null, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "author": "minimax", + "context_length": 196608, + "created_at": "2025-12-23T01:56:37+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.9 + }, + "default_stops": [], + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 196608, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -46524,6 +48097,8 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -46533,7 +48108,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "e462c1ad-93b6-4047-b27d-239e3ba51989", + "id": "64da1c5c-a186-4b10-8ee6-32622dfb26ca", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -46542,49 +48117,62 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], - "default_system": null, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "author": "minimax", + "context_length": 204800, + "created_at": "2025-12-23T01:56:37+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.9 + }, + "default_stops": [], + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 3.2 3B Instruct", + "name": "MiniMax: MiniMax M2.1", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-3b-instruct", - "reasoning_config": null, + "permaslug": "minimax/minimax-m2.1", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.2 3B Instruct", - "slug": "meta-llama/llama-3.2-3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.2-3b-instruct", - "model_variant_slug": "meta-llama/llama-3.2-3b-instruct", + "model_variant_permaslug": "minimax/minimax-m2.1", + "model_variant_slug": "minimax/minimax-m2.1", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-3.2-3b-instruct", + "name": "DeepInfra | minimax/minimax-m2.1", "pricing": { - "completion": "0.00000002", + "completion": "0.00000095", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000000299999997", + "prompt": "0.00000027" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -46669,22 +48257,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Llama-3.2-3B-Instruct", + "provider_model_id": "MiniMaxAI/MiniMax-M2.1", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -46695,45 +48290,62 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 3.2 3B Instruct", + "name": "MiniMax: MiniMax M2.1", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-3b-instruct", - "reasoning_config": null, + "permaslug": "minimax/minimax-m2.1", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.2 3B Instruct", - "slug": "meta-llama/llama-3.2-3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 32768, - "created_at": "2024-09-25T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "mistralai", + "context_length": 131072, + "created_at": "2024-07-19T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks. It offers unparalleled accuracy in image captioning, visual question answering, and advanced image-text comprehension. Pre-trained on vast multimodal datasets and fine-tuned with human feedback, the Llama 90B Vision is engineered to handle the most demanding image-based AI tasks.\n\nThis model is perfect for industries requiring cutting-edge multimodal AI capabilities, particularly those dealing with complex, real-time visual and textual analysis.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -46751,9 +48363,9 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "44ad2a0c-6f79-43be-a970-b581132008b9", + "id": "3e0aa374-de51-4cd0-911b-29ae65067e21", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, @@ -46764,45 +48376,42 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", + "author": "mistralai", "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "created_at": "2024-07-19T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "The Llama 90B Vision model is a top-tier, 90-billion-parameter multimodal model designed for the most challenging visual reasoning and language tasks. It offers unparalleled accuracy in image captioning, visual question answering, and advanced image-text comprehension. Pre-trained on vast multimodal datasets and fine-tuned with human feedback, the Llama 90B Vision is engineered to handle the most demanding image-based AI tasks.\n\nThis model is perfect for industries requiring cutting-edge multimodal AI capabilities, particularly those dealing with complex, real-time visual and textual analysis.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", "features": {}, - "group": "Llama3", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-90B-Vision-Instruct", + "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "llama3", + "input_modalities": ["text"], + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Meta: Llama 3.2 90B Vision Instruct", + "name": "Mistral: Mistral Nemo", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-90b-vision-instruct", + "permaslug": "mistralai/mistral-nemo", "reasoning_config": null, "router": null, - "short_name": "Llama 3.2 90B Vision Instruct", - "slug": "meta-llama/llama-3.2-90b-vision-instruct", + "short_name": "Mistral Nemo", + "slug": "mistralai/mistral-nemo", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.2-90b-vision-instruct", - "model_variant_slug": "meta-llama/llama-3.2-90b-vision-instruct", + "model_variant_permaslug": "mistralai/mistral-nemo", + "model_variant_slug": "mistralai/mistral-nemo", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-3.2-90b-vision-instruct", + "name": "DeepInfra | mistralai/mistral-nemo", "pricing": { - "completion": "0.0000004", + "completion": "0.00000004", "discount": 0, - "image": "0.0005058", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000035", - "request": "0", - "web_search": "0" + "prompt": "0.00000002" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -46887,21 +48496,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct", + "provider_model_id": "mistralai/Mistral-Nemo-Instruct-2407", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -46922,36 +48536,40 @@ "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-90B-Vision-Instruct", + "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "llama3", + "input_modalities": ["text"], + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Meta: Llama 3.2 90B Vision Instruct", + "name": "Mistral: Mistral Nemo", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-90b-vision-instruct", + "permaslug": "mistralai/mistral-nemo", "reasoning_config": null, "router": null, - "short_name": "Llama 3.2 90B Vision Instruct", - "slug": "meta-llama/llama-3.2-90b-vision-instruct", + "short_name": "Mistral Nemo", + "slug": "mistralai/mistral-nemo", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "mistralai", + "context_length": 32768, + "created_at": "2025-01-30T16:43:29.33592+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.3, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -46963,13 +48581,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "e3b0a527-44d6-4ea6-9ec2-6a6416a84c7c", + "id": "a911739a-dc2e-424d-a221-5c776f1552f0", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -46982,47 +48600,57 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "mistralai", + "context_length": 32768, + "created_at": "2025-01-30T16:43:29.33592+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.3, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", - "features": {}, - "group": "Llama3", + "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "mistralai/Mistral-Small-24B-Instruct-2501", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Mistral: Mistral Small 3", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "mistralai/mistral-small-24b-instruct-2501", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Mistral Small 3", + "slug": "mistralai/mistral-small-24b-instruct-2501", + "updated_at": "2025-12-16T18:22:59.07006+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_permaslug": "mistralai/mistral-small-24b-instruct-2501", + "model_variant_slug": "mistralai/mistral-small-24b-instruct-2501", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-3.3-70b-instruct", + "name": "DeepInfra | mistralai/mistral-small-24b-instruct-2501", "pricing": { - "completion": "0.00000032", + "completion": "0.00000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000005" }, - "provider_display_name": "DeepInfra (Turbo)", + "provider_display_name": "DeepInfra", "provider_info": { "adapterName": "DeepInfraAdapter", "baseUrl": "https://api.deepinfra.com/v1/openai", @@ -47034,7 +48662,7 @@ "termsOfServiceURL": "https://deepinfra.com/terms", "training": false }, - "displayName": "DeepInfra (Turbo)", + "displayName": "DeepInfra", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, @@ -47105,20 +48733,25 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra/turbo", + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], + "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "provider_model_id": "mistralai/Mistral-Small-24B-Instruct-2501", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/turbo", + "provider_slug": "deepinfra/fp8", "quantization": "fp8", "supported_parameters": [ "max_tokens", @@ -47131,47 +48764,58 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "mistralai/Mistral-Small-24B-Instruct-2501", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Mistral: Mistral Small 3", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "mistralai/mistral-small-24b-instruct-2501", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Mistral Small 3", + "slug": "mistralai/mistral-small-24b-instruct-2501", + "updated_at": "2025-12-16T18:22:59.07006+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 128000, + "created_at": "2025-06-20T18:10:16.960494+00:00", + "default_parameters": { + "temperature": 0.3 + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 1048576, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -47180,20 +48824,16 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "69a5d06e-1935-4aa5-903f-71058e64399f", + "id": "0ebc91cd-74bd-44ab-9c9a-c1f17d19f128", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -47202,49 +48842,46 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, - "max_tokens_per_image": 3342, + "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 128000, + "created_at": "2025-06-20T18:10:16.960494+00:00", + "default_parameters": { + "temperature": 0.3 + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", "features": {}, - "group": "Llama4", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "name": "Mistral: Mistral Small 3.2 24B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Mistral Small 3.2 24B", + "slug": "mistralai/mistral-small-3.2-24b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "model_variant_slug": "meta-llama/llama-4-maverick", + "model_variant_permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", + "model_variant_slug": "mistralai/mistral-small-3.2-24b-instruct", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-4-maverick-17b-128e-instruct", + "name": "DeepInfra | mistralai/mistral-small-3.2-24b-instruct-2506", "pricing": { - "completion": "0.0000006", + "completion": "0.0000002", "discount": 0, - "image": "0.0006684", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.000000075" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -47329,24 +48966,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra/base", + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], + "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "provider_model_id": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/base", + "provider_slug": "deepinfra/fp8", "quantization": "fp8", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", @@ -47356,7 +48996,8 @@ "repetition_penalty", "top_k", "seed", - "min_p" + "min_p", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, @@ -47365,36 +49006,38 @@ "variant": "standard" }, "features": {}, - "group": "Llama4", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "name": "Mistral: Mistral Small 3.2 24B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Mistral Small 3.2 24B", + "slug": "mistralai/mistral-small-3.2-24b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 327680, - "created_at": "2025-04-05T19:31:59.735804+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "mistralai", + "context_length": 32768, + "created_at": "2023-12-10T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 327680, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -47403,7 +49046,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -47413,7 +49055,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9cf05ded-eefe-41b4-8c08-0c6460feffea", + "id": "a840d695-b1b6-411b-8876-4a6d51218c1e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -47424,47 +49066,44 @@ "limit_rpm_cf": null, "max_completion_tokens": 16384, "max_prompt_tokens": null, - "max_tokens_per_image": 3342, + "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 10000000, - "created_at": "2025-04-05T19:31:59.735804+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "mistralai", + "context_length": 32768, + "created_at": "2023-12-10T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", "features": {}, - "group": "Llama4", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": "mistralai/Mixtral-8x7B-Instruct-v0.1", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "name": "Mistral: Mixtral 8x7B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "permaslug": "mistralai/mixtral-8x7b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Mixtral 8x7B Instruct", + "slug": "mistralai/mixtral-8x7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", - "model_variant_slug": "meta-llama/llama-4-scout", + "model_variant_permaslug": "mistralai/mixtral-8x7b-instruct", + "model_variant_slug": "mistralai/mixtral-8x7b-instruct", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-4-scout-17b-16e-instruct", + "name": "DeepInfra | mistralai/mixtral-8x7b-instruct", "pricing": { - "completion": "0.0000003", + "completion": "0.00000054", "discount": 0, - "image": "0.0003342", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "prompt": "0.00000054" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -47549,17 +49188,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "provider_model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp8", @@ -47575,45 +49219,47 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama4", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": "mistralai/Mixtral-8x7B-Instruct-v0.1", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "name": "Mistral: Mixtral 8x7B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "permaslug": "mistralai/mixtral-8x7b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Mixtral 8x7B Instruct", + "slug": "mistralai/mixtral-8x7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 163840, - "created_at": "2025-04-30T01:06:33.531556+00:00", + "author": "moonshotai", + "context_length": 131000, + "created_at": "2025-07-11T19:47:32.565514+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 131000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -47622,17 +49268,17 @@ "training": false }, "features": { - "supported_parameters": {}, + "supports_multipart": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "850b84c3-42a7-4cec-99c0-b5582d0da66b", + "id": "810046fa-c026-4ccc-b1ef-dfc98be9b76f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -47645,45 +49291,50 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 163840, - "created_at": "2025-04-30T01:06:33.531556+00:00", + "author": "moonshotai", + "context_length": 131072, + "created_at": "2025-07-11T19:47:32.565514+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", - "features": {}, + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Llama-Guard-4-12B", + "hf_slug": "moonshotai/Kimi-K2-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama Guard 4 12B", + "name": "MoonshotAI: Kimi K2 0711", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-4-12b", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama Guard 4 12B", - "slug": "meta-llama/llama-guard-4-12b", + "short_name": "Kimi K2 0711", + "slug": "moonshotai/kimi-k2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-guard-4-12b", - "model_variant_slug": "meta-llama/llama-guard-4-12b", + "model_variant_permaslug": "moonshotai/kimi-k2", + "model_variant_slug": "moonshotai/kimi-k2", "moderation_required": false, - "name": "DeepInfra | meta-llama/llama-guard-4-12b", + "name": "DeepInfra | moonshotai/kimi-k2", "pricing": { - "completion": "0.00000018", + "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000018", - "request": "0", - "web_search": "0" + "prompt": "0.00000055" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -47768,21 +49419,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "meta-llama/Llama-Guard-4-12B", + "provider_model_id": "moonshotai/Kimi-K2-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/fp4", + "quantization": "fp4", "supported_parameters": [ "max_tokens", "temperature", @@ -47794,45 +49450,57 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Llama-Guard-4-12B", + "hf_slug": "moonshotai/Kimi-K2-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama Guard 4 12B", + "name": "MoonshotAI: Kimi K2 0711", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-4-12b", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama Guard 4 12B", - "slug": "meta-llama/llama-guard-4-12b", + "short_name": "Kimi K2 0711", + "slug": "moonshotai/kimi-k2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "microsoft", - "context_length": 16384, - "created_at": "2025-01-10T06:17:52.16346+00:00", + "author": "moonshotai", + "context_length": 131072, + "created_at": "2025-09-04T21:25:47.673205+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 16384, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -47850,7 +49518,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "8e48942d-17fc-4d00-a234-4723034fd971", + "id": "2757a76d-cb1a-4e1b-a8e2-9bb374514d82", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -47859,49 +49527,55 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, - "max_prompt_tokens": 4096, + "max_completion_tokens": null, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "microsoft", - "context_length": 16384, - "created_at": "2025-01-10T06:17:52.16346+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", - "features": {}, + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Other", "has_text_output": true, - "hf_slug": "microsoft/phi-4", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Microsoft: Phi 4", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "microsoft/phi-4", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-0905", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Phi 4", - "slug": "microsoft/phi-4", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "microsoft/phi-4", - "model_variant_slug": "microsoft/phi-4", + "model_variant_permaslug": "moonshotai/kimi-k2-0905", + "model_variant_slug": "moonshotai/kimi-k2-0905", "moderation_required": false, - "name": "DeepInfra | microsoft/phi-4", + "name": "DeepInfra | moonshotai/kimi-k2-0905", "pricing": { - "completion": "0.00000014", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000007", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.0000004" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -47986,21 +49660,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "microsoft/phi-4", + "provider_model_id": "moonshotai/Kimi-K2-Instruct-0905", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/fp4", + "quantization": "fp4", "supported_parameters": [ "max_tokens", "temperature", @@ -48012,41 +49691,57 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Other", "has_text_output": true, - "hf_slug": "microsoft/phi-4", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Microsoft: Phi 4", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "microsoft/phi-4", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-0905", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Phi 4", - "slug": "microsoft/phi-4", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "microsoft", + "author": "moonshotai", "context_length": 131072, - "created_at": "2025-03-08T01:11:24.652063+00:00", - "default_parameters": {}, + "created_at": "2025-11-06T14:50:22.752525+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Phi-4 Multimodal Instruct is a versatile 5.6B parameter foundation model that combines advanced reasoning and instruction-following capabilities across both text and visual inputs, providing accurate text outputs. The unified architecture enables efficient, low-latency inference, suitable for edge and mobile deployments. Phi-4 Multimodal Instruct supports text inputs in multiple languages including Arabic, Chinese, English, French, German, Japanese, Spanish, and more, with visual input optimized primarily for English. It delivers impressive performance on multimodal tasks involving mathematical, scientific, and document reasoning, providing developers and enterprises a powerful yet compact model for sophisticated interactive applications. For more information, see the [Phi-4 Multimodal blog post](https://azure.microsoft.com/en-us/blog/empowering-innovation-the-next-generation-of-the-phi-family/).\n", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, @@ -48059,16 +49754,18 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "345f416d-3f33-4530-8033-614ac900a8fd", + "id": "5458704d-3189-4f87-b791-d6e089aeffa9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -48079,47 +49776,58 @@ "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, - "max_tokens_per_image": 3537, + "max_tokens_per_image": null, "model": { - "author": "microsoft", - "context_length": 131072, - "created_at": "2025-03-08T01:11:24.652063+00:00", - "default_parameters": {}, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Phi-4 Multimodal Instruct is a versatile 5.6B parameter foundation model that combines advanced reasoning and instruction-following capabilities across both text and visual inputs, providing accurate text outputs. The unified architecture enables efficient, low-latency inference, suitable for edge and mobile deployments. Phi-4 Multimodal Instruct supports text inputs in multiple languages including Arabic, Chinese, English, French, German, Japanese, Spanish, and more, with visual input optimized primarily for English. It delivers impressive performance on multimodal tasks involving mathematical, scientific, and document reasoning, providing developers and enterprises a powerful yet compact model for sophisticated interactive applications. For more information, see the [Phi-4 Multimodal blog post](https://azure.microsoft.com/en-us/blog/empowering-innovation-the-next-generation-of-the-phi-family/).\n", - "features": {}, + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, "group": "Other", "has_text_output": true, - "hf_slug": "microsoft/Phi-4-multimodal-instruct", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Microsoft: Phi 4 Multimodal Instruct", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "microsoft/phi-4-multimodal-instruct", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Phi 4 Multimodal Instruct", - "slug": "microsoft/phi-4-multimodal-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "microsoft/phi-4-multimodal-instruct", - "model_variant_slug": "microsoft/phi-4-multimodal-instruct", + "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", + "model_variant_slug": "moonshotai/kimi-k2-thinking", "moderation_required": false, - "name": "DeepInfra | microsoft/phi-4-multimodal-instruct", + "name": "DeepInfra | moonshotai/kimi-k2-thinking-20251106", "pricing": { - "completion": "0.0000001", + "completion": "0.000002", "discount": 0, - "image": "0.00017685", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000141", + "prompt": "0.00000047" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -48204,22 +49912,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "microsoft/Phi-4-multimodal-instruct", + "provider_model_id": "moonshotai/Kimi-K2-Thinking", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/fp4", + "quantization": "fp4", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -48230,45 +49945,62 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, "group": "Other", "has_text_output": true, - "hf_slug": "microsoft/Phi-4-multimodal-instruct", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Microsoft: Phi 4 Multimodal Instruct", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "microsoft/phi-4-multimodal-instruct", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Phi 4 Multimodal Instruct", - "slug": "microsoft/phi-4-multimodal-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "microsoft", - "context_length": 32768, - "created_at": "2025-05-01T20:22:41.235613+00:00", - "default_parameters": {}, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Phi-4-reasoning-plus is an enhanced 14B parameter model from Microsoft, fine-tuned from Phi-4 with additional reinforcement learning to boost accuracy on math, science, and code reasoning tasks. It uses the same dense decoder-only transformer architecture as Phi-4, but generates longer, more comprehensive outputs structured into a step-by-step reasoning trace and final answer.\n\nWhile it offers improved benchmark scores over Phi-4-reasoning across tasks like AIME, OmniMath, and HumanEvalPlus, its responses are typically ~50% longer, resulting in higher latency. Designed for English-only applications, it is well-suited for structured reasoning workflows where output quality takes priority over response speed.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -48277,17 +50009,18 @@ "training": false }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", + "supports_multipart": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "88e7bba7-808f-4eb7-8964-040e99d53e72", + "id": "a1659736-3cfc-47d9-b48f-303c93c57b11", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -48300,53 +50033,54 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "microsoft", - "context_length": 32768, - "created_at": "2025-05-01T20:22:41.235613+00:00", - "default_parameters": {}, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Phi-4-reasoning-plus is an enhanced 14B parameter model from Microsoft, fine-tuned from Phi-4 with additional reinforcement learning to boost accuracy on math, science, and code reasoning tasks. It uses the same dense decoder-only transformer architecture as Phi-4, but generates longer, more comprehensive outputs structured into a step-by-step reasoning trace and final answer.\n\nWhile it offers improved benchmark scores over Phi-4-reasoning across tasks like AIME, OmniMath, and HumanEvalPlus, its responses are typically ~50% longer, resulting in higher latency. Designed for English-only applications, it is well-suited for structured reasoning workflows where output quality takes priority over response speed.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "microsoft/Phi-4-reasoning-plus", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Microsoft: Phi 4 Reasoning Plus", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "microsoft/phi-4-reasoning-plus-04-30", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Phi 4 Reasoning Plus", - "slug": "microsoft/phi-4-reasoning-plus", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "microsoft/phi-4-reasoning-plus-04-30", - "model_variant_slug": "microsoft/phi-4-reasoning-plus", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "DeepInfra | microsoft/phi-4-reasoning-plus-04-30", + "name": "DeepInfra | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.00000035", + "completion": "0.00000225", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000007", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000070000002", + "prompt": "0.00000045" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -48431,21 +50165,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "microsoft/phi-4-reasoning-plus", + "provider_model_id": "moonshotai/Kimi-K2.5", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra", + "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", @@ -48459,57 +50198,56 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "microsoft/Phi-4-reasoning-plus", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Microsoft: Phi 4 Reasoning Plus", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "microsoft/phi-4-reasoning-plus-04-30", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Phi 4 Reasoning Plus", - "slug": "microsoft/phi-4-reasoning-plus", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, { - "author": "minimax", - "context_length": 262144, - "created_at": "2025-10-23T20:41:33.120854+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, - "default_stops": [], + "author": "gryphe", + "context_length": 4096, + "created_at": "2023-07-02T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["###", ""], "default_system": null, - "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 4096, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -48518,8 +50256,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "content-string", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -48529,7 +50265,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cc6153b5-bc90-49ac-afb4-450d1018ebe7", + "id": "ffd94635-42cb-47e4-988a-b905c2e7fa57", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -48538,67 +50274,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-10-23T20:41:33.120854+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, - "default_stops": [], + "author": "gryphe", + "context_length": 4096, + "created_at": "2023-07-02T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["###", ""], "default_system": null, - "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", + "features": {}, + "group": "Llama2", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2", + "hf_slug": "Gryphe/MythoMax-L2-13b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "MiniMax: MiniMax M2", + "name": "MythoMax 13B", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "gryphe/mythomax-l2-13b", + "reasoning_config": null, "router": null, - "short_name": "MiniMax M2", - "slug": "minimax/minimax-m2", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "MythoMax 13B", + "slug": "gryphe/mythomax-l2-13b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "minimax/minimax-m2", - "model_variant_slug": "minimax/minimax-m2", + "model_variant_permaslug": "gryphe/mythomax-l2-13b", + "model_variant_slug": "gryphe/mythomax-l2-13b", "moderation_required": false, - "name": "DeepInfra | minimax/minimax-m2", + "name": "DeepInfra | gryphe/mythomax-l2-13b", "pricing": { - "completion": "0.00000102", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000127", - "internal_reasoning": "0", - "prompt": "0.000000254", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -48683,24 +50396,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "MiniMaxAI/MiniMax-M2", + "provider_model_id": "Gryphe/MythoMax-L2-13b", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/fp16", + "quantization": "fp16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -48711,64 +50427,45 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama2", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2", + "hf_slug": "Gryphe/MythoMax-L2-13b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "MiniMax: MiniMax M2", + "name": "MythoMax 13B", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "gryphe/mythomax-l2-13b", + "reasoning_config": null, "router": null, - "short_name": "MiniMax M2", - "slug": "minimax/minimax-m2", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "MythoMax 13B", + "slug": "gryphe/mythomax-l2-13b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "minimax", - "context_length": 196608, - "created_at": "2025-12-23T01:56:37+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.9 - }, - "default_stops": [], - "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", - "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "author": "nousresearch", + "context_length": 131072, + "created_at": "2024-08-16T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_system": null, + "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 196608, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -48777,8 +50474,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -48788,7 +50483,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "64da1c5c-a186-4b10-8ee6-32622dfb26ca", + "id": "3280e8f8-0960-400c-8284-2e03a0bcb446", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -48797,67 +50492,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-12-23T01:56:37+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.9 - }, - "default_stops": [], - "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", - "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "author": "nousresearch", + "context_length": 131072, + "created_at": "2024-08-16T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_system": null, + "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2.1", + "hf_slug": "NousResearch/Hermes-3-Llama-3.1-405B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "MiniMax: MiniMax M2.1", + "name": "Nous: Hermes 3 405B Instruct", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2.1", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "nousresearch/hermes-3-llama-3.1-405b", + "reasoning_config": null, "router": null, - "short_name": "MiniMax M2.1", - "slug": "minimax/minimax-m2.1", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Hermes 3 405B Instruct", + "slug": "nousresearch/hermes-3-llama-3.1-405b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "minimax/minimax-m2.1", - "model_variant_slug": "minimax/minimax-m2.1", + "model_variant_permaslug": "nousresearch/hermes-3-llama-3.1-405b", + "model_variant_slug": "nousresearch/hermes-3-llama-3.1-405b", "moderation_required": false, - "name": "DeepInfra | minimax/minimax-m2.1", + "name": "DeepInfra | nousresearch/hermes-3-llama-3.1-405b", "pricing": { - "completion": "0.0000012", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000014", - "internal_reasoning": "0", - "prompt": "0.00000028", - "request": "0", - "web_search": "0" + "prompt": "0.000001" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -48942,24 +50614,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "MiniMaxAI/MiniMax-M2.1", + "provider_model_id": "NousResearch/Hermes-3-Llama-3.1-405B", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -48970,62 +50645,45 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2.1", + "hf_slug": "NousResearch/Hermes-3-Llama-3.1-405B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "MiniMax: MiniMax M2.1", + "name": "Nous: Hermes 3 405B Instruct", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2.1", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "nousresearch/hermes-3-llama-3.1-405b", + "reasoning_config": null, "router": null, - "short_name": "MiniMax M2.1", - "slug": "minimax/minimax-m2.1", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Hermes 3 405B Instruct", + "slug": "nousresearch/hermes-3-llama-3.1-405b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 128000, - "created_at": "2025-07-10T15:19:11.726916+00:00", - "default_parameters": { - "temperature": 0.3 - }, - "default_stops": [], + "author": "nousresearch", + "context_length": 131072, + "created_at": "2024-08-18T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and released under the Apache 2.0 license, it features a 128k token context window and supports both Mistral-style function calling and XML output formats.\n\nDesigned for agentic coding workflows, Devstral Small 1.1 is optimized for tasks such as codebase exploration, multi-file edits, and integration into autonomous development agents like OpenHands and Cline. It achieves 53.6% on SWE-Bench Verified, surpassing all other open models on this benchmark, while remaining lightweight enough to run on a single 4090 GPU or Apple silicon machine. The model uses a Tekken tokenizer with a 131k vocabulary and is deployable via vLLM, Transformers, Ollama, LM Studio, and other OpenAI-compatible runtimes.\n", + "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -49043,7 +50701,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "13dc8678-85f8-499a-acf2-e9ecdefada17", + "id": "2151651b-f4e9-4db7-b20d-8b91fb524bf8", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -49056,57 +50714,40 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", + "author": "nousresearch", "context_length": 131072, - "created_at": "2025-07-10T15:19:11.726916+00:00", - "default_parameters": { - "temperature": 0.3 - }, - "default_stops": [], + "created_at": "2024-08-18T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and released under the Apache 2.0 license, it features a 128k token context window and supports both Mistral-style function calling and XML output formats.\n\nDesigned for agentic coding workflows, Devstral Small 1.1 is optimized for tasks such as codebase exploration, multi-file edits, and integration into autonomous development agents like OpenHands and Cline. It achieves 53.6% on SWE-Bench Verified, surpassing all other open models on this benchmark, while remaining lightweight enough to run on a single 4090 GPU or Apple silicon machine. The model uses a Tekken tokenizer with a 131k vocabulary and is deployable via vLLM, Transformers, Ollama, LM Studio, and other OpenAI-compatible runtimes.\n", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Mistral", + "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "mistralai/Devstral-Small-2507", + "hf_slug": "NousResearch/Hermes-3-Llama-3.1-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Mistral: Devstral Small 1.1", + "name": "Nous: Hermes 3 70B Instruct", "output_modalities": ["text"], - "permaslug": "mistralai/devstral-small-2507", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "nousresearch/hermes-3-llama-3.1-70b", + "reasoning_config": null, "router": null, - "short_name": "Devstral Small 1.1", - "slug": "mistralai/devstral-small", + "short_name": "Hermes 3 70B Instruct", + "slug": "nousresearch/hermes-3-llama-3.1-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/devstral-small-2507", - "model_variant_slug": "mistralai/devstral-small", + "model_variant_permaslug": "nousresearch/hermes-3-llama-3.1-70b", + "model_variant_slug": "nousresearch/hermes-3-llama-3.1-70b", "moderation_required": false, - "name": "DeepInfra | mistralai/devstral-small-2507", + "name": "DeepInfra | nousresearch/hermes-3-llama-3.1-70b", "pricing": { - "completion": "0.00000028", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000007", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -49191,17 +50832,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "mistralai/Devstral-Small-2507", + "provider_model_id": "NousResearch/Hermes-3-Llama-3.1-70B", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp8", @@ -49225,49 +50871,37 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Mistral", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "mistralai/Devstral-Small-2507", + "hf_slug": "NousResearch/Hermes-3-Llama-3.1-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Mistral: Devstral Small 1.1", + "name": "Nous: Hermes 3 70B Instruct", "output_modalities": ["text"], - "permaslug": "mistralai/devstral-small-2507", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "nousresearch/hermes-3-llama-3.1-70b", + "reasoning_config": null, "router": null, - "short_name": "Devstral Small 1.1", - "slug": "mistralai/devstral-small", + "short_name": "Hermes 3 70B Instruct", + "slug": "nousresearch/hermes-3-llama-3.1-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 128000, - "created_at": "2025-05-21T14:22:59.930775+00:00", - "default_parameters": { - "temperature": 0.3 - }, - "default_stops": [], + "author": "nvidia", + "context_length": 131072, + "created_at": "2024-10-15T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.", + "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -49279,13 +50913,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "9d9d50d5-6b2f-40c8-9e99-9e9f9e6b7e7a", + "id": "d2a33d30-5d41-47d3-a816-1cc067b5a7dd", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -49294,61 +50928,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", + "author": "nvidia", "context_length": 131072, - "created_at": "2025-05-21T14:22:59.930775+00:00", - "default_parameters": { - "temperature": 0.3 - }, - "default_stops": [], + "created_at": "2024-10-15T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Devstral-Small-2505 is a 24B parameter agentic LLM fine-tuned from Mistral-Small-3.1, jointly developed by Mistral AI and All Hands AI for advanced software engineering tasks. It is optimized for codebase exploration, multi-file editing, and integration into coding agents, achieving state-of-the-art results on SWE-Bench Verified (46.8%).\n\nDevstral supports a 128k context window and uses a custom Tekken tokenizer. It is text-only, with the vision encoder removed, and is suitable for local deployment on high-end consumer hardware (e.g., RTX 4090, 32GB RAM Macs). Devstral is best used in agentic workflows via the OpenHands scaffold and is compatible with inference frameworks like vLLM, Transformers, and Ollama. It is released under the Apache 2.0 license.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Mistral", + "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "mistralai/Devstral-Small-2505", + "hf_slug": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Mistral: Devstral Small 2505", + "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", "output_modalities": ["text"], - "permaslug": "mistralai/devstral-small-2505", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "nvidia/llama-3.1-nemotron-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Devstral Small 2505", - "slug": "mistralai/devstral-small-2505", + "short_name": "Llama 3.1 Nemotron 70B Instruct", + "slug": "nvidia/llama-3.1-nemotron-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/devstral-small-2505", - "model_variant_slug": "mistralai/devstral-small-2505", + "model_variant_permaslug": "nvidia/llama-3.1-nemotron-70b-instruct", + "model_variant_slug": "nvidia/llama-3.1-nemotron-70b-instruct", "moderation_required": false, - "name": "DeepInfra | mistralai/devstral-small-2505", + "name": "DeepInfra | nvidia/llama-3.1-nemotron-70b-instruct", "pricing": { - "completion": "0.00000012", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000012" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -49433,21 +51050,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "mistralai/Devstral-Small-2505", + "provider_model_id": "nvidia/Llama-3.1-Nemotron-70B-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -49459,57 +51081,47 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Mistral", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "mistralai/Devstral-Small-2505", + "hf_slug": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Mistral: Devstral Small 2505", + "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", "output_modalities": ["text"], - "permaslug": "mistralai/devstral-small-2505", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "nvidia/llama-3.1-nemotron-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Devstral Small 2505", - "slug": "mistralai/devstral-small-2505", + "short_name": "Llama 3.1 Nemotron 70B Instruct", + "slug": "nvidia/llama-3.1-nemotron-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32768, - "created_at": "2024-05-27T00:00:00+00:00", - "default_parameters": { - "temperature": 0.3 - }, - "default_stops": ["[INST]", ""], + "author": "nvidia", + "context_length": 131072, + "created_at": "2025-10-10T13:03:15.135862+00:00", + "default_parameters": null, + "default_stops": [], "default_system": null, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", + "description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context. It’s post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and multi-turn chat, followed by multiple RL stages; Reward-aware Preference Optimization (RPO) for alignment, RL with Verifiable Rewards (RLVR) for step-wise reasoning, and iterative DPO to refine tool-use behavior. A distillation-driven Neural Architecture Search (“Puzzle”) replaces some attention blocks and varies FFN widths to shrink memory footprint and improve throughput, enabling single-GPU (H100/H200) deployment while preserving instruction following and CoT quality.\n\nIn internal evaluations (NeMo-Skills, up to 16 runs, temp = 0.6, top_p = 0.95), the model reports strong reasoning/coding results, e.g., MATH500 pass@1 = 97.4, AIME-2024 = 87.5, AIME-2025 = 82.71, GPQA = 71.97, LiveCodeBench (24.10–25.02) = 73.58, and MMLU-Pro (CoT) = 79.53. The model targets practical inference efficiency (high tokens/s, reduced VRAM) with Transformers/vLLM support and explicit “reasoning on/off” modes (chat-first defaults, greedy recommended when disabled). Suitable for building agents, assistants, and long-context retrieval systems where balanced accuracy-to-cost and reliable tool use matter.\n", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -49518,16 +51130,20 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "2efa8e2d-042b-4c64-8a87-0af44d25235d", + "id": "89ac71e7-25fc-49d4-a263-7860d13b8f5e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -49536,51 +51152,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32768, - "created_at": "2024-05-27T00:00:00+00:00", - "default_parameters": { - "temperature": 0.3 - }, - "default_stops": ["[INST]", ""], + "author": "nvidia", + "context_length": 131072, + "created_at": "2025-10-10T13:03:15.135862+00:00", + "default_parameters": null, + "default_stops": [], "default_system": null, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", - "features": {}, - "group": "Mistral", + "description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context. It’s post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and multi-turn chat, followed by multiple RL stages; Reward-aware Preference Optimization (RPO) for alignment, RL with Verifiable Rewards (RLVR) for step-wise reasoning, and iterative DPO to refine tool-use behavior. A distillation-driven Neural Architecture Search (“Puzzle”) replaces some attention blocks and varies FFN widths to shrink memory footprint and improve throughput, enabling single-GPU (H100/H200) deployment while preserving instruction following and CoT quality.\n\nIn internal evaluations (NeMo-Skills, up to 16 runs, temp = 0.6, top_p = 0.95), the model reports strong reasoning/coding results, e.g., MATH500 pass@1 = 97.4, AIME-2024 = 87.5, AIME-2025 = 82.71, GPQA = 71.97, LiveCodeBench (24.10–25.02) = 73.58, and MMLU-Pro (CoT) = 79.53. The model targets practical inference efficiency (high tokens/s, reduced VRAM) with Transformers/vLLM support and explicit “reasoning on/off” modes (chat-first defaults, greedy recommended when disabled). Suitable for building agents, assistants, and long-context retrieval systems where balanced accuracy-to-cost and reliable tool use matter.\n", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Llama3", "has_text_output": true, - "hf_slug": "mistralai/Mistral-7B-Instruct-v0.3", + "hf_slug": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", - "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", - "name": "Mistral: Mistral 7B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-7b-instruct", - "reasoning_config": null, + "permaslug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral 7B Instruct", - "slug": "mistralai/mistral-7b-instruct", + "short_name": "Llama 3.3 Nemotron Super 49B V1.5", + "slug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-7b-instruct", - "model_variant_slug": "mistralai/mistral-7b-instruct", + "model_variant_permaslug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "model_variant_slug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", "moderation_required": false, - "name": "DeepInfra | mistralai/mistral-7b-instruct", + "name": "DeepInfra | nvidia/llama-3.3-nemotron-super-49b-v1.5", "pricing": { - "completion": "0.000000054", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000028", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -49665,22 +51284,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "mistralai/Mistral-7B-Instruct-v0.3", + "provider_model_id": "nvidia/Llama-3.3-Nemotron-Super-49B-v1.5", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -49696,44 +51322,56 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Llama3", "has_text_output": true, - "hf_slug": "mistralai/Mistral-7B-Instruct-v0.3", + "hf_slug": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", - "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", - "name": "Mistral: Mistral 7B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-7b-instruct", - "reasoning_config": null, + "permaslug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral 7B Instruct", - "slug": "mistralai/mistral-7b-instruct", + "short_name": "Llama 3.3 Nemotron Super 49B V1.5", + "slug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 131072, - "created_at": "2024-07-19T00:00:00+00:00", + "author": "nvidia", + "context_length": 262144, + "created_at": "2025-12-14T16:54:35+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", + "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -49742,6 +51380,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -49751,7 +51390,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "3e0aa374-de51-4cd0-911b-29ae65067e21", + "id": "2bd3912d-8b35-4818-b873-24ccfefa6cf9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -49760,51 +51399,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 131072, - "created_at": "2024-07-19T00:00:00+00:00", + "author": "nvidia", + "context_length": 256000, + "created_at": "2025-12-14T16:54:35+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", - "features": {}, - "group": "Mistral", + "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", + "hf_slug": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Nemo", + "name": "NVIDIA: Nemotron 3 Nano 30B A3B", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-nemo", - "reasoning_config": null, + "permaslug": "nvidia/nemotron-3-nano-30b-a3b", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral Nemo", - "slug": "mistralai/mistral-nemo", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Nemotron 3 Nano 30B A3B", + "slug": "nvidia/nemotron-3-nano-30b-a3b", + "updated_at": "2025-12-16T17:44:22.146099+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-nemo", - "model_variant_slug": "mistralai/mistral-nemo", + "model_variant_permaslug": "nvidia/nemotron-3-nano-30b-a3b", + "model_variant_slug": "nvidia/nemotron-3-nano-30b-a3b", "moderation_required": false, - "name": "DeepInfra | mistralai/mistral-nemo", + "name": "DeepInfra | nvidia/nemotron-3-nano-30b-a3b", "pricing": { - "completion": "0.00000004", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000005" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -49889,22 +51536,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "mistralai/Mistral-Nemo-Instruct-2407", + "provider_model_id": "nvidia/Nemotron-3-Nano-30B-A3B", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/fp4", + "quantization": "fp4", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -49915,49 +51569,62 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tool_choice", + "tools" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", + "hf_slug": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Nemo", + "name": "NVIDIA: Nemotron 3 Nano 30B A3B", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-nemo", - "reasoning_config": null, + "permaslug": "nvidia/nemotron-3-nano-30b-a3b", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral Nemo", - "slug": "mistralai/mistral-nemo", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Nemotron 3 Nano 30B A3B", + "slug": "nvidia/nemotron-3-nano-30b-a3b", + "updated_at": "2025-12-16T17:44:22.146099+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32768, - "created_at": "2025-01-30T16:43:29.33592+00:00", + "author": "nvidia", + "context_length": 131072, + "created_at": "2025-10-28T18:19:25.723503+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.3, + "temperature": null, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", + "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME—surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -49975,7 +51642,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "a911739a-dc2e-424d-a221-5c776f1552f0", + "id": "da29268d-2dd6-4eb5-bb11-1f5a1f86b91a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -49984,64 +51651,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32768, - "created_at": "2025-01-30T16:43:29.33592+00:00", + "author": "nvidia", + "context_length": 128000, + "created_at": "2025-10-28T18:19:25.723503+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.3, + "temperature": null, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", + "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME—surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Mistral", + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-24B-Instruct-2501", + "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3", + "name": "NVIDIA: Nemotron Nano 12B 2 VL", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-24b-instruct-2501", + "permaslug": "nvidia/nemotron-nano-12b-v2-vl", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Mistral Small 3", - "slug": "mistralai/mistral-small-24b-instruct-2501", - "updated_at": "2025-12-16T18:22:59.07006+00:00", + "short_name": "Nemotron Nano 12B 2 VL", + "slug": "nvidia/nemotron-nano-12b-v2-vl", + "updated_at": "2025-11-12T02:19:07.557675+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-small-24b-instruct-2501", - "model_variant_slug": "mistralai/mistral-small-24b-instruct-2501", + "model_variant_permaslug": "nvidia/nemotron-nano-12b-v2-vl", + "model_variant_slug": "nvidia/nemotron-nano-12b-v2-vl", "moderation_required": false, - "name": "DeepInfra | mistralai/mistral-small-24b-instruct-2501", + "name": "DeepInfra | nvidia/nemotron-nano-12b-v2-vl", "pricing": { - "completion": "0.00000008", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -50126,22 +51788,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "mistralai/Mistral-Small-24B-Instruct-2501", + "provider_model_id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -50155,7 +51824,7 @@ "response_format" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" @@ -50163,47 +51832,45 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Mistral", + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-24B-Instruct-2501", + "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3", + "name": "NVIDIA: Nemotron Nano 12B 2 VL", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-24b-instruct-2501", + "permaslug": "nvidia/nemotron-nano-12b-v2-vl", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Mistral Small 3", - "slug": "mistralai/mistral-small-24b-instruct-2501", - "updated_at": "2025-12-16T18:22:59.07006+00:00", + "short_name": "Nemotron Nano 12B 2 VL", + "slug": "nvidia/nemotron-nano-12b-v2-vl", + "updated_at": "2025-11-12T02:19:07.557675+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 128000, - "created_at": "2025-06-20T18:10:16.960494+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "author": "nvidia", + "context_length": 131072, + "created_at": "2025-09-05T21:13:27.486887+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", + "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -50215,13 +51882,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "0ebc91cd-74bd-44ab-9c9a-c1f17d19f128", + "id": "e1890582-9493-47ac-b31f-c9da070ff230", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -50234,47 +51901,50 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 128000, - "created_at": "2025-06-20T18:10:16.960494+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "author": "nvidia", + "context_length": 32000, + "created_at": "2025-09-05T21:13:27.486887+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", - "features": {}, - "group": "Mistral", + "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3.2 24B", + "name": "NVIDIA: Nemotron Nano 9B V2", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "reasoning_config": null, + "permaslug": "nvidia/nemotron-nano-9b-v2", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral Small 3.2 24B", - "slug": "mistralai/mistral-small-3.2-24b-instruct", + "short_name": "Nemotron Nano 9B V2", + "slug": "nvidia/nemotron-nano-9b-v2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "model_variant_slug": "mistralai/mistral-small-3.2-24b-instruct", + "model_variant_permaslug": "nvidia/nemotron-nano-9b-v2", + "model_variant_slug": "nvidia/nemotron-nano-9b-v2", "moderation_required": false, - "name": "DeepInfra | mistralai/mistral-small-3.2-24b-instruct-2506", + "name": "DeepInfra | nvidia/nemotron-nano-9b-v2", "pricing": { - "completion": "0.0000002", + "completion": "0.00000016", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000075", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -50359,22 +52029,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "provider_model_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/bf16", + "quantization": "bf16", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -50385,47 +52062,61 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3.2 24B", + "name": "NVIDIA: Nemotron Nano 9B V2", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "reasoning_config": null, + "permaslug": "nvidia/nemotron-nano-9b-v2", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral Small 3.2 24B", - "slug": "mistralai/mistral-small-3.2-24b-instruct", + "short_name": "Nemotron Nano 9B V2", + "slug": "nvidia/nemotron-nano-9b-v2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32768, - "created_at": "2023-12-10T00:00:00+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -50437,13 +52128,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "a840d695-b1b6-411b-8876-4a6d51218c1e", + "id": "b58ea971-6c98-4488-a180-cfb19e626172", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -50452,51 +52143,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32768, - "created_at": "2023-12-10T00:00:00+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", - "features": {}, - "group": "Mistral", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mixtral 8x7B Instruct", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "mistralai/mixtral-8x7b-instruct", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-120b", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mixtral 8x7B Instruct", - "slug": "mistralai/mixtral-8x7b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mixtral-8x7b-instruct", - "model_variant_slug": "mistralai/mixtral-8x7b-instruct", + "model_variant_permaslug": "openai/gpt-oss-120b:exacto", + "model_variant_slug": "openai/gpt-oss-120b:exacto", "moderation_required": false, - "name": "DeepInfra | mistralai/mixtral-8x7b-instruct", + "name": "DeepInfra | openai/gpt-oss-120b:exacto", "pricing": { - "completion": "0.00000054", + "completion": "0.00000019", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000054", - "request": "0", - "web_search": "0" + "prompt": "0.000000039" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -50581,22 +52280,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "provider_model_id": "openai/gpt-oss-120b", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/bf16", + "quantization": "bf16", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -50612,42 +52318,57 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "exacto" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mixtral 8x7B Instruct", + "name": "OpenAI: gpt-oss-120b (exacto)", "output_modalities": ["text"], - "permaslug": "mistralai/mixtral-8x7b-instruct", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-120b", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mixtral 8x7B Instruct", - "slug": "mistralai/mixtral-8x7b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-120b (exacto)", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 131000, - "created_at": "2025-07-11T19:47:32.565514+00:00", - "default_parameters": {}, + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -50656,17 +52377,20 @@ "training": false }, "features": { - "supports_multipart": false, + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "810046fa-c026-4ccc-b1ef-dfc98be9b76f", + "id": "bfc33027-e2bd-4d16-9ee2-cee1913475f0", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -50679,55 +52403,57 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", + "author": "openai", "context_length": 131072, - "created_at": "2025-07-11T19:47:32.565514+00:00", - "default_parameters": {}, + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "GPT", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0711", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2", + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0711", - "slug": "moonshotai/kimi-k2", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2", - "model_variant_slug": "moonshotai/kimi-k2", + "model_variant_permaslug": "openai/gpt-oss-20b", + "model_variant_slug": "openai/gpt-oss-20b", "moderation_required": false, - "name": "DeepInfra | moonshotai/kimi-k2", + "name": "DeepInfra | openai/gpt-oss-20b", "pricing": { - "completion": "0.0000022", + "completion": "0.00000014", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000055", - "request": "0", - "web_search": "0" + "prompt": "0.00000003" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -50812,22 +52538,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "moonshotai/Kimi-K2-Instruct", + "provider_model_id": "openai/gpt-oss-20b", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp4", - "quantization": "fp4", + "provider_slug": "deepinfra/bf16", + "quantization": "bf16", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -50842,53 +52575,56 @@ "tools", "tool_choice" ], - "supports_multipart": false, - "supports_reasoning": false, + "supports_multipart": true, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "GPT", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0711", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2", + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0711", - "slug": "moonshotai/kimi-k2", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 131072, - "created_at": "2025-09-04T21:25:47.673205+00:00", + "author": "qwen", + "context_length": 128000, + "created_at": "2025-03-24T18:10:38.542849+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -50897,16 +52633,20 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "2757a76d-cb1a-4e1b-a8e2-9bb374514d82", + "id": "af2c4f7c-a833-4c69-958d-7996a5a0ff8a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -50919,56 +52659,40 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", + "author": "qwen", + "context_length": 32768, + "created_at": "2025-03-24T18:10:38.542849+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen2.5-VL-32B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen2.5 VL 32B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "qwen/qwen2.5-vl-32b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Qwen2.5 VL 32B Instruct", + "slug": "qwen/qwen2.5-vl-32b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-0905", - "model_variant_slug": "moonshotai/kimi-k2-0905", + "model_variant_permaslug": "qwen/qwen2.5-vl-32b-instruct", + "model_variant_slug": "qwen/qwen2.5-vl-32b-instruct", "moderation_required": false, - "name": "DeepInfra | moonshotai/kimi-k2-0905", + "name": "DeepInfra | qwen/qwen2.5-vl-32b-instruct", "pricing": { - "completion": "0.000002", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000032", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -51053,21 +52777,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "moonshotai/Kimi-K2-Instruct-0905", + "provider_model_id": "Qwen/Qwen2.5-VL-32B-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp4", - "quantization": "fp4", + "provider_slug": "deepinfra/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", @@ -51079,61 +52808,45 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen2.5-VL-32B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen2.5 VL 32B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "qwen/qwen2.5-vl-32b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Qwen2.5 VL 32B Instruct", + "slug": "qwen/qwen2.5-vl-32b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 131072, - "created_at": "2025-11-06T14:50:22.752525+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "qwen", + "context_length": 40960, + "created_at": "2025-04-28T21:41:18.320017+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, programming, and logical inference, and a \"non-thinking\" mode for general-purpose conversation. The model is fine-tuned for instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 40960, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -51142,18 +52855,20 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": { + "response_format": true, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, - "literal_none": false, - "literal_required": true, - "type_function": true + "literal_none": true, + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "5458704d-3189-4f87-b791-d6e089aeffa9", + "id": "778bdebf-d018-4a24-a34e-230fce0f2045", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -51162,65 +52877,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 40960, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "qwen", + "context_length": 131702, + "created_at": "2025-04-28T21:41:18.320017+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, programming, and logical inference, and a \"non-thinking\" mode for general-purpose conversation. The model is fine-tuned for instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "Qwen/Qwen3-14B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "Qwen: Qwen3 14B", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "qwen/qwen3-14b-04-28", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 14B", + "slug": "qwen/qwen3-14b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", + "model_variant_permaslug": "qwen/qwen3-14b-04-28", + "model_variant_slug": "qwen/qwen3-14b", "moderation_required": false, - "name": "DeepInfra | moonshotai/kimi-k2-thinking-20251106", + "name": "DeepInfra | qwen/qwen3-14b-04-28", "pricing": { - "completion": "0.000002", + "completion": "0.00000024", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000141", - "internal_reasoning": "0", - "prompt": "0.00000047", - "request": "0", - "web_search": "0" + "prompt": "0.00000012" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -51305,24 +53007,30 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "moonshotai/Kimi-K2-Thinking", + "provider_model_id": "Qwen/Qwen3-14B", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp4", - "quantization": "fp4", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", + "response_format", "max_tokens", "temperature", "top_p", @@ -51333,7 +53041,6 @@ "top_k", "seed", "min_p", - "response_format", "tools", "tool_choice" ], @@ -51344,47 +53051,44 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "Qwen/Qwen3-14B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "Qwen: Qwen3 14B", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "qwen/qwen3-14b-04-28", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 14B", + "slug": "qwen/qwen3-14b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "gryphe", - "context_length": 4096, - "created_at": "2023-07-02T00:00:00+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", "default_parameters": {}, - "default_stops": ["###", ""], + "default_stops": [], "default_system": null, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 4096, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -51396,64 +53100,69 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "ffd94635-42cb-47e4-988a-b905c2e7fa57", + "id": "3e3e2284-a12c-46b5-b3db-bc4eb148c7d9", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 250, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "gryphe", - "context_length": 4096, - "created_at": "2023-07-02T00:00:00+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", "default_parameters": {}, - "default_stops": ["###", ""], + "default_stops": [], "default_system": null, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", - "features": {}, - "group": "Llama2", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "Gryphe/MythoMax-L2-13b", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "MythoMax 13B", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "gryphe/mythomax-l2-13b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-07-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "MythoMax 13B", - "slug": "gryphe/mythomax-l2-13b", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "gryphe/mythomax-l2-13b", - "model_variant_slug": "gryphe/mythomax-l2-13b", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "DeepInfra | gryphe/mythomax-l2-13b", + "name": "DeepInfra | qwen/qwen3-235b-a22b-07-25", "pricing": { - "completion": "0.00000008", + "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "prompt": "0.000000071" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -51538,21 +53247,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Gryphe/MythoMax-L2-13b", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp16", - "quantization": "fp16", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -51564,45 +53278,61 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama2", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "Gryphe/MythoMax-L2-13b", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "MythoMax 13B", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "gryphe/mythomax-l2-13b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-07-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "MythoMax 13B", - "slug": "gryphe/mythomax-l2-13b", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "nousresearch", - "context_length": 131072, - "created_at": "2024-08-16T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-25T13:19:17.179049+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -51614,13 +53344,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "3280e8f8-0960-400c-8284-2e03a0bcb446", + "id": "e2afc20a-e73b-4af1-b934-bdfe34fe6c77", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -51629,49 +53359,60 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nousresearch", - "context_length": 131072, - "created_at": "2024-08-16T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-25T13:19:17.179049+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.", - "features": {}, - "group": "Llama3", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-3-Llama-3.1-405B", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Nous: Hermes 3 405B Instruct", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-3-llama-3.1-405b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Hermes 3 405B Instruct", - "slug": "nousresearch/hermes-3-llama-3.1-405b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, - "model_variant_permaslug": "nousresearch/hermes-3-llama-3.1-405b", - "model_variant_slug": "nousresearch/hermes-3-llama-3.1-405b", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", "moderation_required": false, - "name": "DeepInfra | nousresearch/hermes-3-llama-3.1-405b", + "name": "DeepInfra | qwen/qwen3-235b-a22b-thinking-2507", "pricing": { - "completion": "0.000001", + "completion": "0.0000023", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000002000000006", + "prompt": "0.00000023" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -51756,22 +53497,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "NousResearch/Hermes-3-Llama-3.1-405B", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -51785,42 +53533,57 @@ "response_format" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-3-Llama-3.1-405B", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Nous: Hermes 3 405B Instruct", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-3-llama-3.1-405b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Hermes 3 405B Instruct", - "slug": "nousresearch/hermes-3-llama-3.1-405b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, { - "author": "nousresearch", - "context_length": 131072, - "created_at": "2024-08-18T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "qwen", + "context_length": 40960, + "created_at": "2025-04-28T22:16:44.177326+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 40960, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -51829,6 +53592,10 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -51838,7 +53605,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "2151651b-f4e9-4db7-b20d-8b91fb524bf8", + "id": "efdfff77-9574-4695-8e08-32d968a43376", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -51847,49 +53614,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 40960, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nousresearch", + "author": "qwen", "context_length": 131072, - "created_at": "2024-08-18T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "created_at": "2025-04-28T22:16:44.177326+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", - "features": {}, - "group": "Llama3", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-3-Llama-3.1-70B", + "hf_slug": "Qwen/Qwen3-30B-A3B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Nous: Hermes 3 70B Instruct", + "name": "Qwen: Qwen3 30B A3B", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-3-llama-3.1-70b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-30b-a3b-04-28", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Hermes 3 70B Instruct", - "slug": "nousresearch/hermes-3-llama-3.1-70b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 30B A3B", + "slug": "qwen/qwen3-30b-a3b", + "updated_at": "2026-01-08T19:57:57.475571+00:00", "warning_message": null }, - "model_variant_permaslug": "nousresearch/hermes-3-llama-3.1-70b", - "model_variant_slug": "nousresearch/hermes-3-llama-3.1-70b", + "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", + "model_variant_slug": "qwen/qwen3-30b-a3b", "moderation_required": false, - "name": "DeepInfra | nousresearch/hermes-3-llama-3.1-70b", + "name": "DeepInfra | qwen/qwen3-30b-a3b-04-28", "pricing": { - "completion": "0.0000003", + "completion": "0.00000028", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000008" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -51974,22 +53751,30 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "NousResearch/Hermes-3-Llama-3.1-70B", + "provider_model_id": "Qwen/Qwen3-30B-A3B", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", + "response_format", "max_tokens", "temperature", "top_p", @@ -52000,45 +53785,57 @@ "top_k", "seed", "min_p", - "response_format" + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-3-Llama-3.1-70B", + "hf_slug": "Qwen/Qwen3-30B-A3B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Nous: Hermes 3 70B Instruct", + "name": "Qwen: Qwen3 30B A3B", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-3-llama-3.1-70b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-30b-a3b-04-28", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Hermes 3 70B Instruct", - "slug": "nousresearch/hermes-3-llama-3.1-70b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 30B A3B", + "slug": "qwen/qwen3-30b-a3b", + "updated_at": "2026-01-08T19:57:57.475571+00:00", "warning_message": null }, { - "author": "nvidia", - "context_length": 131072, - "created_at": "2024-10-15T00:00:00+00:00", + "author": "qwen", + "context_length": 40960, + "created_at": "2025-04-28T21:32:25.189881+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 40960, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -52047,6 +53844,7 @@ "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -52056,7 +53854,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "d2a33d30-5d41-47d3-a816-1cc067b5a7dd", + "id": "6b8c829d-3094-45e7-8139-0a67e09060c3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -52065,49 +53863,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nvidia", + "author": "qwen", "context_length": 131072, - "created_at": "2024-10-15T00:00:00+00:00", + "created_at": "2025-04-28T21:32:25.189881+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "NVIDIA's Llama 3.1 Nemotron 70B is a language model designed for generating precise and useful responses. Leveraging [Llama 3.1 70B](/models/meta-llama/llama-3.1-70b-instruct) architecture and Reinforcement Learning from Human Feedback (RLHF), it excels in automatic alignment benchmarks. This model is tailored for applications requiring high accuracy in helpfulness and response generation, suitable for diverse user queries across multiple domains.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "" + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", + "hf_slug": "Qwen/Qwen3-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", + "name": "Qwen: Qwen3 32B", "output_modalities": ["text"], - "permaslug": "nvidia/llama-3.1-nemotron-70b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-32b-04-28", + "reasoning_config": { + "end_token": "", + "start_token": "" + }, "router": null, - "short_name": "Llama 3.1 Nemotron 70B Instruct", - "slug": "nvidia/llama-3.1-nemotron-70b-instruct", + "short_name": "Qwen3 32B", + "slug": "qwen/qwen3-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "nvidia/llama-3.1-nemotron-70b-instruct", - "model_variant_slug": "nvidia/llama-3.1-nemotron-70b-instruct", + "model_variant_permaslug": "qwen/qwen3-32b-04-28", + "model_variant_slug": "qwen/qwen3-32b", "moderation_required": false, - "name": "DeepInfra | nvidia/llama-3.1-nemotron-70b-instruct", + "name": "DeepInfra | qwen/qwen3-32b-04-28", "pricing": { - "completion": "0.0000012", + "completion": "0.00000028", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000012", - "request": "0", - "web_search": "0" + "prompt": "0.00000008" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -52192,22 +53993,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "nvidia/Llama-3.1-Nemotron-70B-Instruct", + "provider_model_id": "Qwen/Qwen3-32B", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -52223,42 +54031,50 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "" + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF", + "hf_slug": "Qwen/Qwen3-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "NVIDIA: Llama 3.1 Nemotron 70B Instruct", + "name": "Qwen: Qwen3 32B", "output_modalities": ["text"], - "permaslug": "nvidia/llama-3.1-nemotron-70b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-32b-04-28", + "reasoning_config": { + "end_token": "", + "start_token": "" + }, "router": null, - "short_name": "Llama 3.1 Nemotron 70B Instruct", - "slug": "nvidia/llama-3.1-nemotron-70b-instruct", + "short_name": "Qwen3 32B", + "slug": "qwen/qwen3-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "nvidia", - "context_length": 131072, - "created_at": "2025-10-10T13:03:15.135862+00:00", - "default_parameters": null, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-23T00:29:06+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context. It’s post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and multi-turn chat, followed by multiple RL stages; Reward-aware Preference Optimization (RPO) for alignment, RL with Verifiable Rewards (RLVR) for step-wise reasoning, and iterative DPO to refine tool-use behavior. A distillation-driven Neural Architecture Search (“Puzzle”) replaces some attention blocks and varies FFN widths to shrink memory footprint and improve throughput, enabling single-GPU (H100/H200) deployment while preserving instruction following and CoT quality.\n\nIn internal evaluations (NeMo-Skills, up to 16 runs, temp = 0.6, top_p = 0.95), the model reports strong reasoning/coding results, e.g., MATH500 pass@1 = 97.4, AIME-2024 = 87.5, AIME-2025 = 82.71, GPQA = 71.97, LiveCodeBench (24.10–25.02) = 73.58, and MMLU-Pro (CoT) = 79.53. The model targets practical inference efficiency (high tokens/s, reduced VRAM) with Transformers/vLLM support and explicit “reasoning on/off” modes (chat-first defaults, greedy recommended when disabled). Suitable for building agents, assistants, and long-context retrieval systems where balanced accuracy-to-cost and reliable tool use matter.\n", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -52267,10 +54083,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -52280,7 +54092,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "89ac71e7-25fc-49d4-a263-7860d13b8f5e", + "id": "6982ced0-1d3c-4fd0-bc36-21297218fdbe", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -52293,57 +54105,53 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nvidia", - "context_length": 131072, - "created_at": "2025-10-10T13:03:15.135862+00:00", - "default_parameters": null, + "author": "qwen", + "context_length": 1048576, + "created_at": "2025-07-23T00:29:06+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context. It’s post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and multi-turn chat, followed by multiple RL stages; Reward-aware Preference Optimization (RPO) for alignment, RL with Verifiable Rewards (RLVR) for step-wise reasoning, and iterative DPO to refine tool-use behavior. A distillation-driven Neural Architecture Search (“Puzzle”) replaces some attention blocks and varies FFN widths to shrink memory footprint and improve throughput, enabling single-GPU (H100/H200) deployment while preserving instruction following and CoT quality.\n\nIn internal evaluations (NeMo-Skills, up to 16 runs, temp = 0.6, top_p = 0.95), the model reports strong reasoning/coding results, e.g., MATH500 pass@1 = 97.4, AIME-2024 = 87.5, AIME-2025 = 82.71, GPQA = 71.97, LiveCodeBench (24.10–25.02) = 73.58, and MMLU-Pro (CoT) = 79.53. The model targets practical inference efficiency (high tokens/s, reduced VRAM) with Transformers/vLLM support and explicit “reasoning on/off” modes (chat-first defaults, greedy recommended when disabled). Suitable for building agents, assistants, and long-context retrieval systems where balanced accuracy-to-cost and reliable tool use matter.\n", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "features": { "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Llama3", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Llama 3.3 Nemotron Super 49B V1.5", - "slug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", - "model_variant_slug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "model_variant_slug": "qwen/qwen3-coder", "moderation_required": false, - "name": "DeepInfra | nvidia/llama-3.3-nemotron-super-49b-v1.5", + "name": "DeepInfra | qwen/qwen3-coder-480b-a35b-07-25", "pricing": { - "completion": "0.0000004", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000022", + "prompt": "0.00000022" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "DeepInfra (Turbo)", "provider_info": { "adapterName": "DeepInfraAdapter", "baseUrl": "https://api.deepinfra.com/v1/openai", @@ -52355,7 +54163,7 @@ "termsOfServiceURL": "https://deepinfra.com/terms", "training": false }, - "displayName": "DeepInfra", + "displayName": "DeepInfra (Turbo)", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, @@ -52426,24 +54234,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], + "slug": "deepinfra/turbo", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "nvidia/Llama-3.3-Nemotron-Super-49B-v1.5", + "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/turbo", + "quantization": "fp4", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -52459,44 +54270,44 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Llama3", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Llama 3.3 Nemotron Super 49B V1.5", - "slug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "nvidia", - "context_length": 262144, - "created_at": "2025-12-14T16:54:35+00:00", + "author": "qwen", + "context_length": 32768, + "created_at": "2025-10-28T14:48:42.679733+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -52504,11 +54315,11 @@ }, "default_stops": [], "default_system": null, - "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.", + "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -52517,7 +54328,7 @@ "training": false }, "features": { - "reasoning_return_mechanism": "reasoning-content", + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -52527,7 +54338,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "2bd3912d-8b35-4818-b873-24ccfefa6cf9", + "id": "0c401430-de40-460f-917e-ee8a3ce1f65f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -52540,9 +54351,9 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nvidia", - "context_length": 256000, - "created_at": "2025-12-14T16:54:35+00:00", + "author": "qwen", + "context_length": 32000, + "created_at": "2025-10-28T14:48:42.679733+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -52550,50 +54361,45 @@ }, "default_stops": [], "default_system": null, - "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.", + "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Other", - "has_text_output": true, - "hf_slug": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + "has_text_output": false, + "hf_slug": "Qwen/Qwen3-Embedding-4B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "NVIDIA: Nemotron 3 Nano 30B A3B", - "output_modalities": ["text"], - "permaslug": "nvidia/nemotron-3-nano-30b-a3b", + "name": "Qwen: Qwen3 Embedding 4B", + "output_modalities": ["embeddings"], + "permaslug": "qwen/qwen3-embedding-4b", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Nemotron 3 Nano 30B A3B", - "slug": "nvidia/nemotron-3-nano-30b-a3b", - "updated_at": "2025-12-16T17:44:22.146099+00:00", + "short_name": "Qwen3 Embedding 4B", + "slug": "qwen/qwen3-embedding-4b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "nvidia/nemotron-3-nano-30b-a3b", - "model_variant_slug": "nvidia/nemotron-3-nano-30b-a3b", + "model_variant_permaslug": "qwen/qwen3-embedding-4b", + "model_variant_slug": "qwen/qwen3-embedding-4b", "moderation_required": false, - "name": "DeepInfra | nvidia/nemotron-3-nano-30b-a3b", + "name": "DeepInfra | qwen/qwen3-embedding-4b", "pricing": { - "completion": "0.00000024", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000002" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -52678,24 +54484,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "nvidia/Nemotron-3-Nano-30B-A3B", + "provider_model_id": "Qwen/Qwen3-Embedding-4B", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -52706,50 +54515,48 @@ "top_k", "seed", "min_p", - "response_format", - "tool_choice", - "tools" + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Other", - "has_text_output": true, - "hf_slug": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + "has_text_output": false, + "hf_slug": "Qwen/Qwen3-Embedding-4B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "NVIDIA: Nemotron 3 Nano 30B A3B", - "output_modalities": ["text"], - "permaslug": "nvidia/nemotron-3-nano-30b-a3b", + "name": "Qwen: Qwen3 Embedding 4B", + "output_modalities": ["embeddings"], + "permaslug": "qwen/qwen3-embedding-4b", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Nemotron 3 Nano 30B A3B", - "slug": "nvidia/nemotron-3-nano-30b-a3b", - "updated_at": "2025-12-16T17:44:22.146099+00:00", + "short_name": "Qwen3 Embedding 4B", + "slug": "qwen/qwen3-embedding-4b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "nvidia", - "context_length": 131072, - "created_at": "2025-10-28T18:19:25.723503+00:00", + "author": "qwen", + "context_length": 32768, + "created_at": "2025-10-28T19:43:42.126124+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -52757,11 +54564,11 @@ }, "default_stops": [], "default_system": null, - "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME—surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.", + "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -52770,6 +54577,7 @@ "training": false }, "features": { + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -52779,7 +54587,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "da29268d-2dd6-4eb5-bb11-1f5a1f86b91a", + "id": "a3f553f2-1105-4170-868b-6f353def01d5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -52792,9 +54600,9 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nvidia", - "context_length": 128000, - "created_at": "2025-10-28T18:19:25.723503+00:00", + "author": "qwen", + "context_length": 32000, + "created_at": "2025-10-28T19:43:42.126124+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -52802,50 +54610,45 @@ }, "default_stops": [], "default_system": null, - "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME—surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.", + "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Other", - "has_text_output": true, - "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16", + "has_text_output": false, + "hf_slug": "Qwen/Qwen3-Embedding-8B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "NVIDIA: Nemotron Nano 12B 2 VL", - "output_modalities": ["text"], - "permaslug": "nvidia/nemotron-nano-12b-v2-vl", + "name": "Qwen: Qwen3 Embedding 8B", + "output_modalities": ["embeddings"], + "permaslug": "qwen/qwen3-embedding-8b", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Nemotron Nano 12B 2 VL", - "slug": "nvidia/nemotron-nano-12b-v2-vl", - "updated_at": "2025-11-12T02:19:07.557675+00:00", + "short_name": "Qwen3 Embedding 8B", + "slug": "qwen/qwen3-embedding-8b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "nvidia/nemotron-nano-12b-v2-vl", - "model_variant_slug": "nvidia/nemotron-nano-12b-v2-vl", + "model_variant_permaslug": "qwen/qwen3-embedding-8b", + "model_variant_slug": "qwen/qwen3-embedding-8b", "moderation_required": false, - "name": "DeepInfra | nvidia/nemotron-nano-12b-v2-vl", + "name": "DeepInfra | qwen/qwen3-embedding-8b", "pricing": { - "completion": "0.0000006", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000005" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -52930,24 +54733,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL", + "provider_model_id": "Qwen/Qwen3-Embedding-8B", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -52961,7 +54767,7 @@ "response_format" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" @@ -52969,45 +54775,45 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Other", - "has_text_output": true, - "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16", + "has_text_output": false, + "hf_slug": "Qwen/Qwen3-Embedding-8B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "NVIDIA: Nemotron Nano 12B 2 VL", - "output_modalities": ["text"], - "permaslug": "nvidia/nemotron-nano-12b-v2-vl", + "name": "Qwen: Qwen3 Embedding 8B", + "output_modalities": ["embeddings"], + "permaslug": "qwen/qwen3-embedding-8b", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Nemotron Nano 12B 2 VL", - "slug": "nvidia/nemotron-nano-12b-v2-vl", - "updated_at": "2025-11-12T02:19:07.557675+00:00", + "short_name": "Qwen3 Embedding 8B", + "slug": "qwen/qwen3-embedding-8b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "nvidia", - "context_length": 131072, - "created_at": "2025-09-05T21:13:27.486887+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -53025,7 +54831,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "e1890582-9493-47ac-b31f-c9da070ff230", + "id": "74635ab6-7ba2-4cd7-8f2f-10651305385a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -53038,55 +54844,50 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nvidia", - "context_length": 32000, - "created_at": "2025-09-05T21:13:27.486887+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "features": { "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "NVIDIA: Nemotron Nano 9B V2", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "nvidia/nemotron-nano-9b-v2", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Nemotron Nano 9B V2", - "slug": "nvidia/nemotron-nano-9b-v2", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "nvidia/nemotron-nano-9b-v2", - "model_variant_slug": "nvidia/nemotron-nano-9b-v2", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", "moderation_required": false, - "name": "DeepInfra | nvidia/nemotron-nano-9b-v2", + "name": "DeepInfra | qwen/qwen3-next-80b-a3b-instruct-2509", "pricing": { - "completion": "0.00000016", + "completion": "0.0000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000009" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -53171,24 +54972,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", + "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -53204,56 +55008,56 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "NVIDIA: Nemotron Nano 9B V2", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "nvidia/nemotron-nano-9b-v2", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Nemotron Nano 9B V2", - "slug": "nvidia/nemotron-nano-9b-v2", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-23T23:04:47+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -53262,6 +55066,10 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -53271,7 +55079,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "b58ea971-6c98-4488-a180-cfb19e626172", + "id": "a352cc82-2bdb-411b-9f99-bd7829655412", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -53284,60 +55092,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", + "author": "qwen", "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "created_at": "2025-09-23T23:04:47+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 VL 235B A22B Instruct", + "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b:exacto", - "model_variant_slug": "openai/gpt-oss-120b:exacto", + "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", + "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", "moderation_required": false, - "name": "DeepInfra | openai/gpt-oss-120b:exacto", + "name": "DeepInfra | qwen/qwen3-vl-235b-a22b-instruct", "pricing": { - "completion": "0.00000019", + "completion": "0.00000088", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000039", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000011", + "prompt": "0.0000002" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -53422,24 +55225,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "openai/gpt-oss-120b", + "provider_model_id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp4", - "quantization": "fp4", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -53450,62 +55256,59 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], - "variant": "exacto" + "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b (exacto)", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b (exacto)", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 VL 235B A22B Instruct", + "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-10-06T23:47:56.430294+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -53527,7 +55330,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "bfc33027-e2bd-4d16-9ee2-cee1913475f0", + "id": "9b9d1187-7d55-4ec6-87b0-734dd1432829", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -53540,62 +55343,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-10-06T23:47:56.430294+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 VL 30B A3B Instruct", + "slug": "qwen/qwen3-vl-30b-a3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "model_variant_slug": "qwen/qwen3-vl-30b-a3b-instruct", "moderation_required": false, - "name": "DeepInfra | openai/gpt-oss-20b", + "name": "DeepInfra | qwen/qwen3-vl-30b-a3b-instruct", "pricing": { - "completion": "0.00000014", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -53680,24 +55476,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "openai/gpt-oss-20b", + "provider_model_id": "Qwen/Qwen3-VL-30B-A3B-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp4", - "quantization": "fp4", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -53708,60 +55507,56 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 VL 30B A3B Instruct", + "slug": "qwen/qwen3-vl-30b-a3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 128000, - "created_at": "2025-03-24T18:10:38.542849+00:00", + "context_length": 32768, + "created_at": "2024-09-19T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", + "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -53770,10 +55565,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -53783,7 +55574,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "af2c4f7c-a833-4c69-958d-7996a5a0ff8a", + "id": "8b6b26e9-621a-4b31-b55a-c9aaa7482ede", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -53792,49 +55583,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 32768, - "created_at": "2025-03-24T18:10:38.542849+00:00", + "context_length": 131072, + "created_at": "2024-09-19T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", + "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "features": {}, "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-32B-Instruct", + "hf_slug": "Qwen/Qwen2.5-72B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 32B Instruct", + "name": "Qwen2.5 72B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-32b-instruct", + "permaslug": "qwen/qwen-2.5-72b-instruct", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 VL 32B Instruct", - "slug": "qwen/qwen2.5-vl-32b-instruct", + "short_name": "Qwen2.5 72B Instruct", + "slug": "qwen/qwen-2.5-72b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen2.5-vl-32b-instruct", - "model_variant_slug": "qwen/qwen2.5-vl-32b-instruct", + "model_variant_permaslug": "qwen/qwen-2.5-72b-instruct", + "model_variant_slug": "qwen/qwen-2.5-72b-instruct", "moderation_required": false, - "name": "DeepInfra | qwen/qwen2.5-vl-32b-instruct", + "name": "DeepInfra | qwen/qwen-2.5-72b-instruct", "pricing": { - "completion": "0.0000006", + "completion": "0.00000039", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000012" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -53919,21 +55705,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen2.5-VL-32B-Instruct", + "provider_model_id": "Qwen/Qwen2.5-72B-Instruct", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "deepinfra/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -53945,45 +55736,47 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-32B-Instruct", + "hf_slug": "Qwen/Qwen2.5-72B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 32B Instruct", + "name": "Qwen2.5 72B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-32b-instruct", + "permaslug": "qwen/qwen-2.5-72b-instruct", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 VL 32B Instruct", - "slug": "qwen/qwen2.5-vl-32b-instruct", + "short_name": "Qwen2.5 72B Instruct", + "slug": "qwen/qwen-2.5-72b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T21:41:18.320017+00:00", + "author": "sao10k", + "context_length": 8192, + "created_at": "2024-08-13T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, programming, and logical inference, and a \"non-thinking\" mode for general-purpose conversation. The model is fine-tuned for instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling.", + "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 40960, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -53992,20 +55785,17 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": false - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "778bdebf-d018-4a24-a34e-230fce0f2045", + "id": "cc4c8dc5-8b3f-4d54-84e2-8381184ff841", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -54014,59 +55804,46 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 40960, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131702, - "created_at": "2025-04-28T21:41:18.320017+00:00", + "author": "sao10k", + "context_length": 8192, + "created_at": "2024-08-13T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, programming, and logical inference, and a \"non-thinking\" mode for general-purpose conversation. The model is fine-tuned for instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen3", + "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-14B", + "hf_slug": "Sao10K/L3-8B-Lunaris-v1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen3 14B", + "name": "Sao10K: Llama 3 8B Lunaris", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-14b-04-28", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "sao10k/l3-lunaris-8b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 14B", - "slug": "qwen/qwen3-14b", + "short_name": "Llama 3 8B Lunaris", + "slug": "sao10k/l3-lunaris-8b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-14b-04-28", - "model_variant_slug": "qwen/qwen3-14b", + "model_variant_permaslug": "sao10k/l3-lunaris-8b", + "model_variant_slug": "sao10k/l3-lunaris-8b", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-14b-04-28", + "name": "DeepInfra | sao10k/l3-lunaris-8b", "pricing": { - "completion": "0.00000024", + "completion": "0.00000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "DeepInfra (Turbo)", "provider_info": { "adapterName": "DeepInfraAdapter", "baseUrl": "https://api.deepinfra.com/v1/openai", @@ -54078,7 +55855,7 @@ "termsOfServiceURL": "https://deepinfra.com/terms", "training": false }, - "displayName": "DeepInfra", + "displayName": "DeepInfra (Turbo)", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, @@ -54149,25 +55926,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], + "slug": "deepinfra/turbo", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-14B", + "provider_model_id": "Sao10K/L3-8B-Lunaris-v1-Turbo", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", + "provider_slug": "deepinfra/turbo", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", - "response_format", "max_tokens", "temperature", "top_p", @@ -54178,54 +55957,45 @@ "top_k", "seed", "min_p", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-14B", + "hf_slug": "Sao10K/L3-8B-Lunaris-v1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen3 14B", + "name": "Sao10K: Llama 3 8B Lunaris", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-14b-04-28", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "sao10k/l3-lunaris-8b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 14B", - "slug": "qwen/qwen3-14b", + "short_name": "Llama 3 8B Lunaris", + "slug": "sao10k/l3-lunaris-8b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T21:29:17.25671+00:00", + "author": "sao10k", + "context_length": 131072, + "created_at": "2024-08-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", + "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 40960, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -54234,17 +56004,16 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "5f108245-8fec-4702-859d-56c5b54a71d7", + "id": "7ab4ba43-98eb-4842-9046-f7f1822ab3a2", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -54253,57 +56022,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 40960, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "sao10k", "context_length": 131072, - "created_at": "2025-04-28T21:29:17.25671+00:00", + "created_at": "2024-08-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen3", + "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B", + "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B", + "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-04-28", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "sao10k/l3.1-euryale-70b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B", - "slug": "qwen/qwen3-235b-a22b", + "short_name": "Llama 3.1 Euryale 70B v2.2", + "slug": "sao10k/l3.1-euryale-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-04-28", - "model_variant_slug": "qwen/qwen3-235b-a22b", + "model_variant_permaslug": "sao10k/l3.1-euryale-70b", + "model_variant_slug": "sao10k/l3.1-euryale-70b", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-235b-a22b-04-28", + "name": "DeepInfra | sao10k/l3.1-euryale-70b", "pricing": { - "completion": "0.00000054", + "completion": "0.00000085", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000018", - "request": "0", - "web_search": "0" + "prompt": "0.00000085" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -54388,24 +56144,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-235B-A22B", + "provider_model_id": "Sao10K/L3.1-70B-Euryale-v2.2", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -54416,55 +56175,45 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B", + "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B", + "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-04-28", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "sao10k/l3.1-euryale-70b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B", - "slug": "qwen/qwen3-235b-a22b", + "short_name": "Llama 3.1 Euryale 70B v2.2", + "slug": "sao10k/l3.1-euryale-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", + "author": "sao10k", + "context_length": 131072, + "created_at": "2024-12-18T15:32:08.468786+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -54476,74 +56225,59 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "3e3e2284-a12c-46b5-b3db-bc4eb148c7d9", + "id": "6e3850b8-2305-4bda-990a-7aa06427bc83", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 250, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", + "author": "sao10k", + "context_length": 8192, + "created_at": "2024-12-18T15:32:08.468786+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "Sao10K/L3.3-70B-Euryale-v2.3", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Sao10K: Llama 3.3 Euryale 70B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "sao10k/l3.3-euryale-70b-v2.3", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Llama 3.3 Euryale 70B", + "slug": "sao10k/l3.3-euryale-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "sao10k/l3.3-euryale-70b-v2.3", + "model_variant_slug": "sao10k/l3.3-euryale-70b", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-235b-a22b-07-25", + "name": "DeepInfra | sao10k/l3.3-euryale-70b-v2.3", "pricing": { - "completion": "0.000000463", + "completion": "0.00000085", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000071", - "request": "0", - "web_search": "0" + "prompt": "0.00000085" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -54628,17 +56362,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "provider_model_id": "Sao10K/L3.3-70B-Euryale-v2.3", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp8", @@ -54654,61 +56393,49 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "Sao10K/L3.3-70B-Euryale-v2.3", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Sao10K: Llama 3.3 Euryale 70B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "sao10k/l3.3-euryale-70b-v2.3", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Llama 3.3 Euryale 70B", + "slug": "sao10k/l3.3-euryale-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-25T13:19:17.179049+00:00", + "author": "sentence-transformers", + "context_length": 512, + "created_at": "2025-11-18T02:15:55.450718+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "The all-MiniLM-L12-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, clustering, and similarity-scoring.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 512, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -54720,13 +56447,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "e2afc20a-e73b-4af1-b934-bdfe34fe6c77", + "id": "205fec37-272b-4f38-84d9-41b8a976c581", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -54739,60 +56466,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-25T13:19:17.179049+00:00", + "author": "sentence-transformers", + "context_length": 8192, + "created_at": "2025-11-18T02:15:55.450718+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "The all-MiniLM-L12-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, clustering, and similarity-scoring.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "group": "Other", + "has_text_output": false, + "hf_slug": "sentence-transformers/all-MiniLM-L12-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "name": "Sentence Transformers: all-MiniLM-L12-v2", + "output_modalities": ["embeddings"], + "permaslug": "sentence-transformers/all-minilm-l12-v2-20251117", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "all-MiniLM-L12-v2", + "slug": "sentence-transformers/all-minilm-l12-v2", + "updated_at": "2025-11-18T02:47:01.300061+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", - "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_permaslug": "sentence-transformers/all-minilm-l12-v2-20251117", + "model_variant_slug": "sentence-transformers/all-minilm-l12-v2", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-235b-a22b-thinking-2507", + "name": "DeepInfra | sentence-transformers/all-minilm-l12-v2-20251117", "pricing": { - "completion": "0.00000239", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000023", - "request": "0", - "web_search": "0" + "prompt": "0.000000005" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -54877,24 +56599,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "provider_model_id": "sentence-transformers/all-MiniLM-L12-v2", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -54908,7 +56633,7 @@ "response_format" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" @@ -54916,49 +56641,49 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "group": "Other", + "has_text_output": false, + "hf_slug": "sentence-transformers/all-MiniLM-L12-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "name": "Sentence Transformers: all-MiniLM-L12-v2", + "output_modalities": ["embeddings"], + "permaslug": "sentence-transformers/all-minilm-l12-v2-20251117", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "all-MiniLM-L12-v2", + "slug": "sentence-transformers/all-minilm-l12-v2", + "updated_at": "2025-11-18T02:47:01.300061+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T22:16:44.177326+00:00", + "author": "sentence-transformers", + "context_length": 512, + "created_at": "2025-11-17T23:12:56.610141+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "The all-MiniLM-L6-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, enabling high-quality semantic representations that are ideal for downstream tasks such as information retrieval, clustering, similarity scoring, and text ranking.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 40960, + "context_length": 512, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -54967,20 +56692,16 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": false - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "efdfff77-9574-4695-8e08-32d968a43376", + "id": "b77e4b8c-8fbf-4a51-984f-53b83a6a3284", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -54989,64 +56710,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 40960, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T22:16:44.177326+00:00", + "author": "sentence-transformers", + "context_length": 8192, + "created_at": "2025-11-17T23:12:56.610141+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "The all-MiniLM-L6-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, enabling high-quality semantic representations that are ideal for downstream tasks such as information retrieval, clustering, similarity scoring, and text ranking.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "group": "Other", + "has_text_output": false, + "hf_slug": "sentence-transformers/all-MiniLM-L6-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "name": "Sentence Transformers: all-MiniLM-L6-v2", + "output_modalities": ["embeddings"], + "permaslug": "sentence-transformers/all-minilm-l6-v2-20251117", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "all-MiniLM-L6-v2", + "slug": "sentence-transformers/all-minilm-l6-v2", + "updated_at": "2025-11-18T00:09:22.500349+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", - "model_variant_slug": "qwen/qwen3-30b-a3b", + "model_variant_permaslug": "sentence-transformers/all-minilm-l6-v2-20251117", + "model_variant_slug": "sentence-transformers/all-minilm-l6-v2", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-30b-a3b-04-28", + "name": "DeepInfra | sentence-transformers/all-minilm-l6-v2-20251117", "pricing": { - "completion": "0.00000029", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "prompt": "0.000000005" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -55131,25 +56847,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-30B-A3B", + "provider_model_id": "sentence-transformers/all-MiniLM-L6-v2", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "response_format", "max_tokens", "temperature", "top_p", @@ -55160,57 +56878,60 @@ "top_k", "seed", "min_p", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "group": "Other", + "has_text_output": false, + "hf_slug": "sentence-transformers/all-MiniLM-L6-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "name": "Sentence Transformers: all-MiniLM-L6-v2", + "output_modalities": ["embeddings"], + "permaslug": "sentence-transformers/all-minilm-l6-v2-20251117", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "all-MiniLM-L6-v2", + "slug": "sentence-transformers/all-minilm-l6-v2", + "updated_at": "2025-11-18T00:09:22.500349+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T21:32:25.189881+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "author": "sentence-transformers", + "context_length": 512, + "created_at": "2025-11-17T23:23:50.085215+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "The all-mpnet-base-v2 embedding model encodes sentences and short paragraphs into a 768-dimensional dense vector space, providing high-fidelity semantic embeddings well suited for tasks like information retrieval, clustering, similarity scoring, and text ranking.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 40960, + "context_length": 512, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -55219,17 +56940,16 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, - "type_function": false + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "6b8c829d-3094-45e7-8139-0a67e09060c3", + "id": "1ae66267-9caf-4e1e-8c06-23c46cf1899c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -55242,53 +56962,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:32:25.189881+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "author": "sentence-transformers", + "context_length": 8192, + "created_at": "2025-11-17T23:23:50.085215+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "The all-mpnet-base-v2 embedding model encodes sentences and short paragraphs into a 768-dimensional dense vector space, providing high-fidelity semantic embeddings well suited for tasks like information retrieval, clustering, similarity scoring, and text ranking.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "group": "Other", + "has_text_output": false, + "hf_slug": "sentence-transformers/all-mpnet-base-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "name": "Sentence Transformers: all-mpnet-base-v2", + "output_modalities": ["embeddings"], + "permaslug": "sentence-transformers/all-mpnet-base-v2-20251117", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "all-mpnet-base-v2", + "slug": "sentence-transformers/all-mpnet-base-v2", + "updated_at": "2025-11-18T00:09:06.924239+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-32b-04-28", - "model_variant_slug": "qwen/qwen3-32b", + "model_variant_permaslug": "sentence-transformers/all-mpnet-base-v2-20251117", + "model_variant_slug": "sentence-transformers/all-mpnet-base-v2", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-32b-04-28", + "name": "DeepInfra | sentence-transformers/all-mpnet-base-v2-20251117", "pricing": { - "completion": "0.00000028", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "prompt": "0.000000005" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -55373,24 +57095,27 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-32B", + "provider_model_id": "sentence-transformers/all-mpnet-base-v2", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -55401,55 +57126,60 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "group": "Other", + "has_text_output": false, + "hf_slug": "sentence-transformers/all-mpnet-base-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "name": "Sentence Transformers: all-mpnet-base-v2", + "output_modalities": ["embeddings"], + "permaslug": "sentence-transformers/all-mpnet-base-v2-20251117", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "all-mpnet-base-v2", + "slug": "sentence-transformers/all-mpnet-base-v2", + "updated_at": "2025-11-18T00:09:06.924239+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-23T00:29:06+00:00", - "default_parameters": {}, + "author": "sentence-transformers", + "context_length": 512, + "created_at": "2025-11-18T02:02:19.770104+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "The multi-qa-mpnet-base-dot-v1 embedding model transforms sentences and short paragraphs into a 768-dimensional dense vector space, generating high-quality semantic embeddings optimized for question-and-answer retrieval, semantic search, and similarity-scoring across diverse content.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 512, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -55467,7 +57197,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "6982ced0-1d3c-4fd0-bc36-21297218fdbe", + "id": "763c1e1e-10bf-43b8-82f5-a73847ef2d7f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -55480,57 +57210,57 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 1048576, - "created_at": "2025-07-23T00:29:06+00:00", - "default_parameters": {}, + "author": "sentence-transformers", + "context_length": 8192, + "created_at": "2025-11-18T02:02:19.770104+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "The multi-qa-mpnet-base-dot-v1 embedding model transforms sentences and short paragraphs into a 768-dimensional dense vector space, generating high-quality semantic embeddings optimized for question-and-answer retrieval, semantic search, and similarity-scoring across diverse content.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "group": "Other", + "has_text_output": false, + "hf_slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "name": "Sentence Transformers: multi-qa-mpnet-base-dot-v1", + "output_modalities": ["embeddings"], + "permaslug": "sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "multi-qa-mpnet-base-dot-v1", + "slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1", + "updated_at": "2025-11-18T02:47:07.243977+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "model_variant_slug": "qwen/qwen3-coder", + "model_variant_permaslug": "sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117", + "model_variant_slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-coder-480b-a35b-07-25", + "name": "DeepInfra | sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117", "pricing": { - "completion": "0.0000012", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000028", - "request": "0", - "web_search": "0" + "prompt": "0.000000005" }, - "provider_display_name": "DeepInfra (Turbo)", + "provider_display_name": "DeepInfra", "provider_info": { "adapterName": "DeepInfraAdapter", "baseUrl": "https://api.deepinfra.com/v1/openai", @@ -55542,7 +57272,7 @@ "termsOfServiceURL": "https://deepinfra.com/terms", "training": false }, - "displayName": "DeepInfra (Turbo)", + "displayName": "DeepInfra", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, @@ -55613,21 +57343,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra/turbo", + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], + "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo", + "provider_model_id": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/turbo", - "quantization": "fp4", + "provider_slug": "deepinfra", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -55639,49 +57374,48 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "group": "Other", + "has_text_output": false, + "hf_slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "name": "Sentence Transformers: multi-qa-mpnet-base-dot-v1", + "output_modalities": ["embeddings"], + "permaslug": "sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "multi-qa-mpnet-base-dot-v1", + "slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1", + "updated_at": "2025-11-18T02:47:07.243977+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32768, - "created_at": "2025-10-28T14:48:42.679733+00:00", + "author": "sentence-transformers", + "context_length": 512, + "created_at": "2025-11-18T02:20:54.215887+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -55689,11 +57423,11 @@ }, "default_stops": [], "default_system": null, - "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", + "description": "The paraphrase-MiniLM-L6-v2 embedding model converts sentences and short paragraphs into a 384-dimensional dense vector space, producing high-quality semantic embeddings optimized for paraphrase detection, semantic similarity scoring, clustering, and lightweight retrieval tasks.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 512, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -55702,7 +57436,6 @@ "training": false }, "features": { - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -55712,7 +57445,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "0c401430-de40-460f-917e-ee8a3ce1f65f", + "id": "89d827bb-abe3-4bec-8956-2233d1bf5a2b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -55725,9 +57458,9 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 32000, - "created_at": "2025-10-28T14:48:42.679733+00:00", + "author": "sentence-transformers", + "context_length": 8192, + "created_at": "2025-11-18T02:20:54.215887+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -55735,7 +57468,7 @@ }, "default_stops": [], "default_system": null, - "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", + "description": "The paraphrase-MiniLM-L6-v2 embedding model converts sentences and short paragraphs into a 384-dimensional dense vector space, producing high-quality semantic embeddings optimized for paraphrase detection, semantic similarity scoring, clustering, and lightweight retrieval tasks.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -55746,39 +57479,34 @@ }, "group": "Other", "has_text_output": false, - "hf_slug": "Qwen/Qwen3-Embedding-4B", + "hf_slug": "sentence-transformers/paraphrase-MiniLM-L6-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Embedding 4B", + "name": "Sentence Transformers: paraphrase-MiniLM-L6-v2", "output_modalities": ["embeddings"], - "permaslug": "qwen/qwen3-embedding-4b", + "permaslug": "sentence-transformers/paraphrase-minilm-l6-v2-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Embedding 4B", - "slug": "qwen/qwen3-embedding-4b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "paraphrase-MiniLM-L6-v2", + "slug": "sentence-transformers/paraphrase-minilm-l6-v2", + "updated_at": "2025-11-18T02:46:57.195578+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-embedding-4b", - "model_variant_slug": "qwen/qwen3-embedding-4b", + "model_variant_permaslug": "sentence-transformers/paraphrase-minilm-l6-v2-20251117", + "model_variant_slug": "sentence-transformers/paraphrase-minilm-l6-v2", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-embedding-4b", + "name": "DeepInfra | sentence-transformers/paraphrase-minilm-l6-v2-20251117", "pricing": { "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "prompt": "0.000000005" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -55863,17 +57591,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-Embedding-4B", + "provider_model_id": "sentence-transformers/paraphrase-MiniLM-L6-v2", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra", @@ -55907,30 +57640,30 @@ }, "group": "Other", "has_text_output": false, - "hf_slug": "Qwen/Qwen3-Embedding-4B", + "hf_slug": "sentence-transformers/paraphrase-MiniLM-L6-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Embedding 4B", + "name": "Sentence Transformers: paraphrase-MiniLM-L6-v2", "output_modalities": ["embeddings"], - "permaslug": "qwen/qwen3-embedding-4b", + "permaslug": "sentence-transformers/paraphrase-minilm-l6-v2-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Embedding 4B", - "slug": "qwen/qwen3-embedding-4b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "paraphrase-MiniLM-L6-v2", + "slug": "sentence-transformers/paraphrase-minilm-l6-v2", + "updated_at": "2025-11-18T02:46:57.195578+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32768, - "created_at": "2025-10-28T19:43:42.126124+00:00", + "author": "thenlper", + "context_length": 512, + "created_at": "2025-11-18T02:43:40.915161+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -55938,11 +57671,11 @@ }, "default_stops": [], "default_system": null, - "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", + "description": "The gte-base embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, delivering efficient and effective semantic embeddings optimized for textual similarity, semantic search, and clustering applications.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 512, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -55951,7 +57684,6 @@ "training": false }, "features": { - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -55961,7 +57693,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "a3f553f2-1105-4170-868b-6f353def01d5", + "id": "4b1b4c07-7ad9-416c-b731-381e0c528c74", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -55974,9 +57706,9 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 32000, - "created_at": "2025-10-28T19:43:42.126124+00:00", + "author": "thenlper", + "context_length": 8192, + "created_at": "2025-11-18T02:43:40.915161+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -55984,7 +57716,7 @@ }, "default_stops": [], "default_system": null, - "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", + "description": "The gte-base embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, delivering efficient and effective semantic embeddings optimized for textual similarity, semantic search, and clustering applications.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -55995,39 +57727,34 @@ }, "group": "Other", "has_text_output": false, - "hf_slug": "Qwen/Qwen3-Embedding-8B", + "hf_slug": "thenlper/gte-base", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Embedding 8B", + "name": "Thenlper: GTE-Base", "output_modalities": ["embeddings"], - "permaslug": "qwen/qwen3-embedding-8b", + "permaslug": "thenlper/gte-base-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Embedding 8B", - "slug": "qwen/qwen3-embedding-8b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GTE-Base", + "slug": "thenlper/gte-base", + "updated_at": "2025-11-18T16:27:19.105717+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-embedding-8b", - "model_variant_slug": "qwen/qwen3-embedding-8b", + "model_variant_permaslug": "thenlper/gte-base-20251117", + "model_variant_slug": "thenlper/gte-base", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-embedding-8b", + "name": "DeepInfra | thenlper/gte-base-20251117", "pricing": { "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000001", - "request": "0", - "web_search": "0" + "prompt": "0.000000005" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -56112,17 +57839,22 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-Embedding-8B", + "provider_model_id": "thenlper/gte-base", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra", @@ -56156,38 +57888,42 @@ }, "group": "Other", "has_text_output": false, - "hf_slug": "Qwen/Qwen3-Embedding-8B", + "hf_slug": "thenlper/gte-base", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Embedding 8B", + "name": "Thenlper: GTE-Base", "output_modalities": ["embeddings"], - "permaslug": "qwen/qwen3-embedding-8b", + "permaslug": "thenlper/gte-base-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Embedding 8B", - "slug": "qwen/qwen3-embedding-8b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GTE-Base", + "slug": "thenlper/gte-base", + "updated_at": "2025-11-18T16:27:19.105717+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", - "default_parameters": {}, + "author": "thenlper", + "context_length": 512, + "created_at": "2025-11-18T02:40:55.377007+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "description": "The gte-large embedding model converts English sentences, paragraphs and moderate-length documents into a 1024-dimensional dense vector space, delivering high-quality semantic embeddings optimized for information retrieval, semantic textual similarity, reranking and clustering tasks. Trained via multi-stage contrastive learning on a large domain-diverse relevance corpus, it offers excellent performance across general-purpose embedding use-cases.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 512, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -56205,7 +57941,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "74635ab6-7ba2-4cd7-8f2f-10651305385a", + "id": "31f228a6-f183-4617-950d-7c4a14d74acd", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -56218,55 +57954,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", - "default_parameters": {}, + "author": "thenlper", + "context_length": 8192, + "created_at": "2025-11-18T02:40:55.377007+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "description": "The gte-large embedding model converts English sentences, paragraphs and moderate-length documents into a 1024-dimensional dense vector space, delivering high-quality semantic embeddings optimized for information retrieval, semantic textual similarity, reranking and clustering tasks. Trained via multi-stage contrastive learning on a large domain-diverse relevance corpus, it offers excellent performance across general-purpose embedding use-cases.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "group": "Other", + "has_text_output": false, + "hf_slug": "thenlper/gte-large", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "name": "Thenlper: GTE-Large", + "output_modalities": ["embeddings"], + "permaslug": "thenlper/gte-large-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GTE-Large", + "slug": "thenlper/gte-large", + "updated_at": "2025-11-18T16:28:48.144005+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", + "model_variant_permaslug": "thenlper/gte-large-20251117", + "model_variant_slug": "thenlper/gte-large", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-next-80b-a3b-instruct-2509", + "name": "DeepInfra | thenlper/gte-large-20251117", "pricing": { - "completion": "0.0000011", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.00000001" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -56351,21 +58087,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "provider_model_id": "thenlper/gte-large", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -56377,61 +58118,56 @@ "top_k", "seed", "min_p", - "response_format", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "group": "Other", + "has_text_output": false, + "hf_slug": "thenlper/gte-large", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "name": "Thenlper: GTE-Large", + "output_modalities": ["embeddings"], + "permaslug": "thenlper/gte-large-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GTE-Large", + "slug": "thenlper/gte-large", + "updated_at": "2025-11-18T16:28:48.144005+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-23T23:04:47+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, - "default_stops": [], + "author": "microsoft", + "context_length": 65536, + "created_at": "2024-04-16T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["USER:", ""], "default_system": null, - "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 65536, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -56440,10 +58176,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -56453,7 +58185,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "a352cc82-2bdb-411b-9f99-bd7829655412", + "id": "03ac4ad1-a230-4ce7-821c-e797305733df", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -56462,63 +58194,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-23T23:04:47+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, - "default_stops": [], + "author": "microsoft", + "context_length": 65536, + "created_at": "2024-04-16T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["USER:", ""], "default_system": null, - "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "hf_slug": "microsoft/WizardLM-2-8x22B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "vicuna", "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "name": "WizardLM-2 8x22B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "microsoft/wizardlm-2-8x22b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 VL 235B A22B Instruct", - "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "short_name": "WizardLM-2 8x22B", + "slug": "microsoft/wizardlm-2-8x22b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", + "model_variant_permaslug": "microsoft/wizardlm-2-8x22b", + "model_variant_slug": "microsoft/wizardlm-2-8x22b", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-vl-235b-a22b-instruct", + "name": "DeepInfra | microsoft/wizardlm-2-8x22b", "pricing": { - "completion": "0.0000012", + "completion": "0.00000048", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000048" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -56603,21 +58316,26 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "provider_model_id": "microsoft/WizardLM-2-8x22B", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", @@ -56637,51 +58355,41 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "hf_slug": "microsoft/WizardLM-2-8x22B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "vicuna", "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "name": "WizardLM-2 8x22B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "microsoft/wizardlm-2-8x22b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 VL 235B A22B Instruct", - "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "short_name": "WizardLM-2 8x22B", + "slug": "microsoft/wizardlm-2-8x22b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-06T23:47:56.430294+00:00", + "author": "z-ai", + "context_length": 202752, + "created_at": "2025-09-30T12:32:56.306946+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": 0.6, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 202752, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -56690,6 +58398,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supported_parameters": { "response_format": false, "structured_outputs": false @@ -56703,7 +58412,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9b9d1187-7d55-4ec6-87b0-734dd1432829", + "id": "c8607edc-8577-4199-b03d-1213da4743dc", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -56716,60 +58425,56 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-06T23:47:56.430294+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-09-30T12:32:56.306946+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": 0.6, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Instruct", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 30B A3B Instruct", - "slug": "qwen/qwen3-vl-30b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-instruct", - "model_variant_slug": "qwen/qwen3-vl-30b-a3b-instruct", + "model_variant_permaslug": "z-ai/glm-4.6", + "model_variant_slug": "z-ai/glm-4.6", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-vl-30b-a3b-instruct", + "name": "DeepInfra | z-ai/glm-4.6", "pricing": { - "completion": "0.0000006", + "completion": "0.00000174", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000000799999993", + "prompt": "0.00000043" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -56854,22 +58559,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "provider_model_id": "zai-org/GLM-4.6", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/fp4", + "quantization": "fp4", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -56880,60 +58592,62 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Instruct", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 30B A3B Instruct", - "slug": "qwen/qwen3-vl-30b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-14T17:35:08.402158+00:00", + "author": "z-ai", + "context_length": 131072, + "created_at": "2025-12-08T15:24:22.464154+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": 0.8, + "top_p": 0.6 }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", + "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -56942,20 +58656,19 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, + "reasoning_return_mechanism": "reasoning-content", + "supports_base64_video_input": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": false }, "has_chat_completions": true, "has_completions": true, - "id": "d7d0bf69-5553-4ac3-a864-9e54dc6b5e93", + "id": "118b9f16-32ae-4579-b723-815eb609bde5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -56968,59 +58681,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 256000, - "created_at": "2025-10-14T17:35:08.402158+00:00", + "author": "z-ai", + "context_length": 131072, + "created_at": "2025-12-08T15:24:22.464154+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": 0.8, + "top_p": 0.6 }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", + "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", + "hf_slug": "zai-org/GLM-4.6V", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["image", "text", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 8B Instruct", + "name": "Z.AI: GLM 4.6V", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-8b-instruct", + "permaslug": "z-ai/glm-4.6-20251208", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 8B Instruct", - "slug": "qwen/qwen3-vl-8b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6V", + "slug": "z-ai/glm-4.6v", + "updated_at": "2025-12-08T15:45:24.970322+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-8b-instruct", - "model_variant_slug": "qwen/qwen3-vl-8b-instruct", + "model_variant_permaslug": "z-ai/glm-4.6-20251208", + "model_variant_slug": "z-ai/glm-4.6v", "moderation_required": false, - "name": "DeepInfra | qwen/qwen3-vl-8b-instruct", + "name": "DeepInfra | z-ai/glm-4.6-20251208", "pricing": { - "completion": "0.00000069", + "completion": "0.0000009", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000018", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -57105,22 +58814,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen3-VL-8B-Instruct", + "provider_model_id": "zai-org/GLM-4.6V", "provider_name": "DeepInfra", "provider_region": null, "provider_slug": "deepinfra/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -57131,55 +58847,63 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tools", + "tool_choice", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", + "hf_slug": "zai-org/GLM-4.6V", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["image", "text", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 8B Instruct", + "name": "Z.AI: GLM 4.6V", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-8b-instruct", + "permaslug": "z-ai/glm-4.6-20251208", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 8B Instruct", - "slug": "qwen/qwen3-vl-8b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6V", + "slug": "z-ai/glm-4.6v", + "updated_at": "2025-12-08T15:45:24.970322+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32768, - "created_at": "2024-09-19T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "z-ai", + "context_length": 202752, + "created_at": "2025-12-22T04:33:34.884504+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 202752, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -57188,16 +58912,17 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": false, "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "8b6b26e9-621a-4b31-b55a-c9aaa7482ede", + "id": "ab79ffa8-8dcc-41c4-9a9f-6cc408135243", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -57206,49 +58931,60 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2024-09-19T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", - "features": {}, - "group": "Qwen", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-72B-Instruct", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen2.5 72B Instruct", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-72b-instruct", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.7-20251222", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Qwen2.5 72B Instruct", - "slug": "qwen/qwen-2.5-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen-2.5-72b-instruct", - "model_variant_slug": "qwen/qwen-2.5-72b-instruct", + "model_variant_permaslug": "z-ai/glm-4.7-20251222", + "model_variant_slug": "z-ai/glm-4.7", "moderation_required": false, - "name": "DeepInfra | qwen/qwen-2.5-72b-instruct", + "name": "DeepInfra | z-ai/glm-4.7-20251222", "pricing": { - "completion": "0.00000039", + "completion": "0.00000175", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000012", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000008", + "prompt": "0.0000004" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -57333,22 +59069,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen2.5-72B-Instruct", + "provider_model_id": "zai-org/GLM-4.7", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/fp4", + "quantization": "fp4", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -57361,45 +59104,61 @@ "min_p", "response_format", "tools", - "tool_choice" + "tool_choice", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Qwen", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-72B-Instruct", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen2.5 72B Instruct", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-72b-instruct", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.7-20251222", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Qwen2.5 72B Instruct", - "slug": "qwen/qwen-2.5-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32768, - "created_at": "2024-11-11T23:40:00.276653+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "z-ai", + "context_length": 202752, + "created_at": "2026-01-19T14:45:13.352372+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\n\n- Significantly improvements in **code generation**, **code reasoning** and **code fixing**. \n- A more comprehensive foundation for real-world applications such as **Code Agents**. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.\n\nTo read more about its evaluation results, check out [Qwen 2.5 Coder's blog](https://qwenlm.github.io/blog/qwen2.5-coder-family/).", + "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.", "endpoint": { "adapter_name": "DeepInfraAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 202752, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://deepinfra.com/privacy", @@ -57408,16 +59167,17 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "48e34779-1643-4717-8c51-9a9e02f4b993", + "id": "6efb5994-8fcb-4300-9c64-ba393193f5f6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -57426,49 +59186,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 128000, - "created_at": "2024-11-11T23:40:00.276653+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "z-ai", + "context_length": 200000, + "created_at": "2026-01-19T14:45:13.352372+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\n\n- Significantly improvements in **code generation**, **code reasoning** and **code fixing**. \n- A more comprehensive foundation for real-world applications such as **Code Agents**. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.\n\nTo read more about its evaluation results, check out [Qwen 2.5 Coder's blog](https://qwenlm.github.io/blog/qwen2.5-coder-family/).", - "features": {}, - "group": "Qwen", + "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-Coder-32B-Instruct", + "hf_slug": "zai-org/GLM-4.7-Flash", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen2.5 Coder 32B Instruct", + "name": "Z.AI: GLM 4.7 Flash", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-coder-32b-instruct", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.7-flash-20260119", + "reasoning_config": {}, "router": null, - "short_name": "Qwen2.5 Coder 32B Instruct", - "slug": "qwen/qwen-2.5-coder-32b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7 Flash", + "slug": "z-ai/glm-4.7-flash", + "updated_at": "2026-01-19T15:38:17.116015+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen-2.5-coder-32b-instruct", - "model_variant_slug": "qwen/qwen-2.5-coder-32b-instruct", + "model_variant_permaslug": "z-ai/glm-4.7-flash-20260119", + "model_variant_slug": "z-ai/glm-4.7-flash", "moderation_required": false, - "name": "DeepInfra | qwen/qwen-2.5-coder-32b-instruct", + "name": "DeepInfra | z-ai/glm-4.7-flash-20260119", "pricing": { - "completion": "0.00000015", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000000100000002", + "prompt": "0.00000006" }, "provider_display_name": "DeepInfra", "provider_info": { @@ -57553,22 +59316,29 @@ "BAAI/bge-m3-multi", "google/embeddinggemma-300m", "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo", + "microsoft/Phi-4-multimodal-instruct", + "deepseek-ai/DeepSeek-V3.2-Exp", + "meta-llama/Llama-3.2-90B-Vision-Instruct", + "meta-llama/Meta-Llama-3-70B-Instruct", + "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "DeepInfra", - "owners": ["{}"], + "owners": ["{}", "org_38GSfr7NVq111kyg6iXh1TTpKuz"], "slug": "deepinfra", "statusPageUrl": "https://status.deepinfra.com/" }, - "provider_model_id": "Qwen/Qwen2.5-Coder-32B-Instruct", + "provider_model_id": "zai-org/GLM-4.7-Flash", "provider_name": "DeepInfra", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "deepinfra/bf16", + "quantization": "bf16", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -57579,54 +59349,82 @@ "top_k", "seed", "min_p", - "response_format" + "response_format", + "tool_choice", + "tools", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Qwen", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-Coder-32B-Instruct", + "hf_slug": "zai-org/GLM-4.7-Flash", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen2.5 Coder 32B Instruct", + "name": "Z.AI: GLM 4.7 Flash", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-coder-32b-instruct", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.7-flash-20260119", + "reasoning_config": {}, "router": null, - "short_name": "Qwen2.5 Coder 32B Instruct", - "slug": "qwen/qwen-2.5-coder-32b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7 Flash", + "slug": "z-ai/glm-4.7-flash", + "updated_at": "2026-01-19T15:38:17.116015+00:00", "warning_message": null - }, + } + ], + "name": "DeepInfra", + "slug": "deepinfra" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": true + }, + "displayName": "DeepSeek", + "headquarters": "CN", + "icon": { + "url": "/images/icons/DeepSeek.png" + }, + "models": [ { - "author": "sao10k", - "context_length": 8192, - "created_at": "2024-08-13T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-12-01T13:10:42.818885+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "DeepSeekAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", - "training": false + "privacyPolicyURL": "https://chat.deepseek.com/downloads/DeepSeek%20Privacy%20Policy.html", + "retainsPrompts": true, + "termsOfServiceURL": "https://chat.deepseek.com/downloads/DeepSeek%20Terms%20of%20Use.html", + "training": true }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", + "supports_implicit_caching": true, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -57636,7 +59434,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cc4c8dc5-8b3f-4d54-84e2-8381184ff841", + "id": "684ad760-951b-4f00-a25f-8a5c85e979d6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -57645,206 +59443,183 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 64000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sao10k", - "context_length": 8192, - "created_at": "2024-08-13T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-12-01T13:10:42.818885+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", - "features": {}, - "group": "Llama3", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Sao10K/L3-8B-Lunaris-v1", + "hf_slug": "deepseek-ai/DeepSeek-V3.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "Sao10K: Llama 3 8B Lunaris", + "name": "DeepSeek: DeepSeek V3.2", "output_modalities": ["text"], - "permaslug": "sao10k/l3-lunaris-8b", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-v3.2-20251201", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3 8B Lunaris", - "slug": "sao10k/l3-lunaris-8b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "DeepSeek V3.2", + "slug": "deepseek/deepseek-v3.2", + "updated_at": "2025-12-01T14:46:05.824401+00:00", "warning_message": null }, - "model_variant_permaslug": "sao10k/l3-lunaris-8b", - "model_variant_slug": "sao10k/l3-lunaris-8b", + "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", + "model_variant_slug": "deepseek/deepseek-v3.2", "moderation_required": false, - "name": "DeepInfra | sao10k/l3-lunaris-8b", + "name": "DeepSeek | deepseek/deepseek-v3.2-20251201", "pricing": { - "completion": "0.00000005", + "completion": "0.00000042", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000028", + "prompt": "0.00000028" }, - "provider_display_name": "DeepInfra (Turbo)", + "provider_display_name": "DeepSeek", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "DeepSeekAdapter", + "baseUrl": "https://api.deepseek.com/beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", - "training": false + "privacyPolicyURL": "https://chat.deepseek.com/downloads/DeepSeek%20Privacy%20Policy.html", + "retainsPrompts": true, + "termsOfServiceURL": "https://chat.deepseek.com/downloads/DeepSeek%20Terms%20of%20Use.html", + "training": true }, - "displayName": "DeepInfra (Turbo)", + "displayName": "DeepSeek", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", + "headquarters": "CN", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/DeepSeek.png" }, - "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" - ], + "ignoredProviderModels": ["deepseek-reasoner"], "isAbortable": true, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "DeepInfra", + "name": "DeepSeek", "owners": ["{}"], - "slug": "deepinfra/turbo", - "statusPageUrl": "https://status.deepinfra.com/" + "slug": "deepseek", + "statusPageUrl": "https://status.deepseek.com/" }, - "provider_model_id": "Sao10K/L3-8B-Lunaris-v1-Turbo", - "provider_name": "DeepInfra", + "provider_model_id": "deepseek-chat", + "provider_name": "DeepSeek", "provider_region": null, - "provider_slug": "deepinfra/turbo", - "quantization": "fp8", + "provider_slug": "deepseek", + "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", - "top_k", - "seed", - "min_p", + "logprobs", + "top_logprobs", + "tools", + "tool_choice", "response_format" ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_multipart": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Sao10K/L3-8B-Lunaris-v1", + "hf_slug": "deepseek-ai/DeepSeek-V3.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "Sao10K: Llama 3 8B Lunaris", + "name": "DeepSeek: DeepSeek V3.2", "output_modalities": ["text"], - "permaslug": "sao10k/l3-lunaris-8b", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-v3.2-20251201", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3 8B Lunaris", - "slug": "sao10k/l3-lunaris-8b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "DeepSeek V3.2", + "slug": "deepseek/deepseek-v3.2", + "updated_at": "2025-12-01T14:46:05.824401+00:00", "warning_message": null - }, + } + ], + "name": "DeepSeek", + "slug": "deepseek" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Featherless", + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://featherless.ai/&size=256" + }, + "models": [ { - "author": "sao10k", - "context_length": 131072, - "created_at": "2024-08-28T00:00:00+00:00", + "author": "alfredpros", + "context_length": 4096, + "created_at": "2025-04-14T14:44:34.216191+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["###", ""], "default_system": null, - "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", + "description": "A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.", "endpoint": { - "adapter_name": "DeepInfraAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "OpenAIAdapter", + "can_abort": false, + "context_length": 4096, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://featherless.ai/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://featherless.ai/terms", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -57854,7 +59629,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "7ab4ba43-98eb-4842-9046-f7f1822ab3a2", + "id": "7b1e72cd-c414-4ed4-b277-2edaddbf97a0", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -57863,148 +59638,84 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sao10k", - "context_length": 131072, - "created_at": "2024-08-28T00:00:00+00:00", + "author": "alfredpros", + "context_length": 4096, + "created_at": "2025-04-14T14:44:34.216191+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["###", ""], "default_system": null, - "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", + "description": "A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.", "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", + "hf_slug": "AlfredPros/CodeLlama-7b-Instruct-Solidity", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", + "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity", "output_modalities": ["text"], - "permaslug": "sao10k/l3.1-euryale-70b", + "permaslug": "alfredpros/codellama-7b-instruct-solidity", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 Euryale 70B v2.2", - "slug": "sao10k/l3.1-euryale-70b", + "short_name": "CodeLLaMa 7B Instruct Solidity", + "slug": "alfredpros/codellama-7b-instruct-solidity", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "sao10k/l3.1-euryale-70b", - "model_variant_slug": "sao10k/l3.1-euryale-70b", + "model_variant_permaslug": "alfredpros/codellama-7b-instruct-solidity", + "model_variant_slug": "alfredpros/codellama-7b-instruct-solidity", "moderation_required": false, - "name": "DeepInfra | sao10k/l3.1-euryale-70b", + "name": "Featherless | alfredpros/codellama-7b-instruct-solidity", "pricing": { - "completion": "0.00000085", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000085", - "request": "0", - "web_search": "0" + "prompt": "0.0000008" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Featherless", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.featherless.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://featherless.ai/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://featherless.ai/terms", "training": false }, - "displayName": "DeepInfra", + "displayName": "Featherless", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://featherless.ai/&size=256" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "featherless-ai/Qwerky-72B", + "AlfredPros/CodeLlama-7b-Instruct-Solidity", + "EleutherAI/llemma_7b", + "featherless-ai/QRWKV-72B" ], - "isAbortable": true, - "isMultipartSupported": true, + "isAbortable": false, + "isMultipartSupported": false, "moderationRequired": false, - "name": "DeepInfra", + "name": "Featherless", "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "slug": "featherless", + "statusPageUrl": "https://featherless.ai/status" }, - "provider_model_id": "Sao10K/L3.1-70B-Euryale-v2.2", - "provider_name": "DeepInfra", + "provider_model_id": "AlfredPros/CodeLlama-7b-Instruct-Solidity", + "provider_name": "Featherless", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "featherless", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -58014,55 +59725,55 @@ "presence_penalty", "repetition_penalty", "top_k", - "seed", "min_p", - "response_format" + "seed" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", + "hf_slug": "AlfredPros/CodeLlama-7b-Instruct-Solidity", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", + "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity", "output_modalities": ["text"], - "permaslug": "sao10k/l3.1-euryale-70b", + "permaslug": "alfredpros/codellama-7b-instruct-solidity", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 Euryale 70B v2.2", - "slug": "sao10k/l3.1-euryale-70b", + "short_name": "CodeLLaMa 7B Instruct Solidity", + "slug": "alfredpros/codellama-7b-instruct-solidity", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "sao10k", - "context_length": 131072, - "created_at": "2024-12-18T15:32:08.468786+00:00", + "author": "eleutherai", + "context_length": 4096, + "created_at": "2025-04-14T15:07:05.530993+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["Source: assistant", ""], "default_system": null, - "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", + "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens. Llemma models are particularly strong at chain-of-thought mathematical reasoning and using computational tools for mathematics, such as Python and formal theorem provers.", "endpoint": { - "adapter_name": "DeepInfraAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "OpenAIAdapter", + "can_abort": false, + "context_length": 4096, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://featherless.ai/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://featherless.ai/terms", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -58072,7 +59783,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "6e3850b8-2305-4bda-990a-7aa06427bc83", + "id": "eb965d1b-6176-49cd-a4ea-c8bae8bb8f1d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -58081,148 +59792,84 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sao10k", - "context_length": 8192, - "created_at": "2024-12-18T15:32:08.468786+00:00", + "author": "eleutherai", + "context_length": 4096, + "created_at": "2025-04-14T15:07:05.530993+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["Source: assistant", ""], "default_system": null, - "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", + "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens. Llemma models are particularly strong at chain-of-thought mathematical reasoning and using computational tools for mathematics, such as Python and formal theorem provers.", "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "Sao10K/L3.3-70B-Euryale-v2.3", + "hf_slug": "EleutherAI/llemma_7b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "code-llama", "model_version_group_id": null, - "name": "Sao10K: Llama 3.3 Euryale 70B", + "name": "EleutherAI: Llemma 7b", "output_modalities": ["text"], - "permaslug": "sao10k/l3.3-euryale-70b-v2.3", + "permaslug": "eleutherai/llemma_7b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.3 Euryale 70B", - "slug": "sao10k/l3.3-euryale-70b", + "short_name": "Llemma 7b", + "slug": "eleutherai/llemma_7b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "sao10k/l3.3-euryale-70b-v2.3", - "model_variant_slug": "sao10k/l3.3-euryale-70b", + "model_variant_permaslug": "eleutherai/llemma_7b", + "model_variant_slug": "eleutherai/llemma_7b", "moderation_required": false, - "name": "DeepInfra | sao10k/l3.3-euryale-70b-v2.3", + "name": "Featherless | eleutherai/llemma_7b", "pricing": { - "completion": "0.00000085", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000085", - "request": "0", - "web_search": "0" + "prompt": "0.0000008" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Featherless", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.featherless.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://featherless.ai/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://featherless.ai/terms", "training": false }, - "displayName": "DeepInfra", + "displayName": "Featherless", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://featherless.ai/&size=256" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "featherless-ai/Qwerky-72B", + "AlfredPros/CodeLlama-7b-Instruct-Solidity", + "EleutherAI/llemma_7b", + "featherless-ai/QRWKV-72B" ], - "isAbortable": true, - "isMultipartSupported": true, + "isAbortable": false, + "isMultipartSupported": false, "moderationRequired": false, - "name": "DeepInfra", + "name": "Featherless", "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "slug": "featherless", + "statusPageUrl": "https://featherless.ai/status" }, - "provider_model_id": "Sao10K/L3.3-70B-Euryale-v2.3", - "provider_name": "DeepInfra", + "provider_model_id": "EleutherAI/llemma_7b", + "provider_name": "Featherless", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "featherless", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -58232,59 +59879,74 @@ "presence_penalty", "repetition_penalty", "top_k", - "seed", "min_p", - "response_format" + "seed" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "Sao10K/L3.3-70B-Euryale-v2.3", + "hf_slug": "EleutherAI/llemma_7b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "code-llama", "model_version_group_id": null, - "name": "Sao10K: Llama 3.3 Euryale 70B", + "name": "EleutherAI: Llemma 7b", "output_modalities": ["text"], - "permaslug": "sao10k/l3.3-euryale-70b-v2.3", + "permaslug": "eleutherai/llemma_7b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.3 Euryale 70B", - "slug": "sao10k/l3.3-euryale-70b", + "short_name": "Llemma 7b", + "slug": "eleutherai/llemma_7b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - }, - { - "author": "sentence-transformers", - "context_length": 512, - "created_at": "2025-11-18T02:15:55.450718+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + } + ], + "name": "Featherless", + "slug": "featherless" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Fireworks", + "headquarters": "US", + "icon": { + "url": "/images/icons/Fireworks.png" + }, + "models": [ + { + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The all-MiniLM-L12-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, clustering, and similarity-scoring.", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 512, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -58294,7 +59956,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "205fec37-272b-4f38-84d9-41b8a976c581", + "id": "cbe2855c-ea77-474f-9190-11d51fc4921e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -58307,232 +59969,156 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sentence-transformers", - "context_length": 8192, - "created_at": "2025-11-18T02:15:55.450718+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The all-MiniLM-L12-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, producing efficient and high-quality semantic embeddings optimized for tasks such as semantic search, clustering, and similarity-scoring.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": false, - "hf_slug": "sentence-transformers/all-MiniLM-L12-v2", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "features": {}, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Sentence Transformers: all-MiniLM-L12-v2", - "output_modalities": ["embeddings"], - "permaslug": "sentence-transformers/all-minilm-l12-v2-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, "router": null, - "short_name": "all-MiniLM-L12-v2", - "slug": "sentence-transformers/all-minilm-l12-v2", - "updated_at": "2025-11-18T02:47:01.300061+00:00", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "sentence-transformers/all-minilm-l12-v2-20251117", - "model_variant_slug": "sentence-transformers/all-minilm-l12-v2", + "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", + "model_variant_slug": "deepseek/deepseek-chat-v3-0324", "moderation_required": false, - "name": "DeepInfra | sentence-transformers/all-minilm-l12-v2-20251117", + "name": "Fireworks | deepseek/deepseek-chat-v3-0324", "pricing": { - "completion": "0", + "completion": "0.0000009000000000000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000045000000000000003", + "prompt": "0.0000009000000000000001" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "sentence-transformers/all-MiniLM-L12-v2", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/deepseek-v3-0324", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra", + "provider_slug": "fireworks", "quantization": "unknown", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", - "response_format" + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": false, - "hf_slug": "sentence-transformers/all-MiniLM-L12-v2", + "features": {}, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Sentence Transformers: all-MiniLM-L12-v2", - "output_modalities": ["embeddings"], - "permaslug": "sentence-transformers/all-minilm-l12-v2-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, "router": null, - "short_name": "all-MiniLM-L12-v2", - "slug": "sentence-transformers/all-minilm-l12-v2", - "updated_at": "2025-11-18T02:47:01.300061+00:00", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "sentence-transformers", - "context_length": 512, - "created_at": "2025-11-17T23:12:56.610141+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-08-21T12:33:48+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "The all-MiniLM-L6-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, enabling high-quality semantic representations that are ideal for downstream tasks such as information retrieval, clustering, similarity scoring, and text ranking.", + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 512, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { + "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -58542,7 +60128,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "b77e4b8c-8fbf-4a51-984f-53b83a6a3284", + "id": "0c00f0d2-8aad-4700-bd36-11cb9a717fde", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -58555,229 +60141,173 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sentence-transformers", - "context_length": 8192, - "created_at": "2025-11-17T23:12:56.610141+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-08-21T12:33:48+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "The all-MiniLM-L6-v2 embedding model maps sentences and short paragraphs into a 384-dimensional dense vector space, enabling high-quality semantic representations that are ideal for downstream tasks such as information retrieval, clustering, similarity scoring, and text ranking.", + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Other", - "has_text_output": false, - "hf_slug": "sentence-transformers/all-MiniLM-L6-v2", + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "Sentence Transformers: all-MiniLM-L6-v2", - "output_modalities": ["embeddings"], - "permaslug": "sentence-transformers/all-minilm-l6-v2-20251117", + "name": "DeepSeek: DeepSeek V3.1", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-chat-v3.1", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "all-MiniLM-L6-v2", - "slug": "sentence-transformers/all-minilm-l6-v2", - "updated_at": "2025-11-18T00:09:22.500349+00:00", + "short_name": "DeepSeek V3.1", + "slug": "deepseek/deepseek-chat-v3.1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "sentence-transformers/all-minilm-l6-v2-20251117", - "model_variant_slug": "sentence-transformers/all-minilm-l6-v2", + "model_variant_permaslug": "deepseek/deepseek-chat-v3.1", + "model_variant_slug": "deepseek/deepseek-chat-v3.1", "moderation_required": false, - "name": "DeepInfra | sentence-transformers/all-minilm-l6-v2-20251117", + "name": "Fireworks | deepseek/deepseek-chat-v3.1", "pricing": { - "completion": "0", + "completion": "0.00000168", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000028", + "prompt": "0.00000056" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "sentence-transformers/all-MiniLM-L6-v2", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/deepseek-v3p1", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra", + "provider_slug": "fireworks", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", - "response_format" + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Other", - "has_text_output": false, - "hf_slug": "sentence-transformers/all-MiniLM-L6-v2", + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "Sentence Transformers: all-MiniLM-L6-v2", - "output_modalities": ["embeddings"], - "permaslug": "sentence-transformers/all-minilm-l6-v2-20251117", + "name": "DeepSeek: DeepSeek V3.1", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-chat-v3.1", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "all-MiniLM-L6-v2", - "slug": "sentence-transformers/all-minilm-l6-v2", - "updated_at": "2025-11-18T00:09:22.500349+00:00", + "short_name": "DeepSeek V3.1", + "slug": "deepseek/deepseek-chat-v3.1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "sentence-transformers", - "context_length": 512, - "created_at": "2025-11-17T23:23:50.085215+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "The all-mpnet-base-v2 embedding model encodes sentences and short paragraphs into a 768-dimensional dense vector space, providing high-fidelity semantic embeddings well suited for tasks like information retrieval, clustering, similarity scoring, and text ranking.", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 512, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { @@ -58790,7 +60320,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "1ae66267-9caf-4e1e-8c06-23c46cf1899c", + "id": "f9518f40-e138-489b-9c40-9660d158d958", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -58800,162 +60330,95 @@ "limit_rpm": null, "limit_rpm_cf": null, "max_completion_tokens": null, - "max_prompt_tokens": null, + "max_prompt_tokens": 4096, "max_tokens_per_image": null, "model": { - "author": "sentence-transformers", - "context_length": 8192, - "created_at": "2025-11-17T23:23:50.085215+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "The all-mpnet-base-v2 embedding model encodes sentences and short paragraphs into a 768-dimensional dense vector space, providing high-fidelity semantic embeddings well suited for tasks like information retrieval, clustering, similarity scoring, and text ranking.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": false, - "hf_slug": "sentence-transformers/all-mpnet-base-v2", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Sentence Transformers: all-mpnet-base-v2", - "output_modalities": ["embeddings"], - "permaslug": "sentence-transformers/all-mpnet-base-v2-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "all-mpnet-base-v2", - "slug": "sentence-transformers/all-mpnet-base-v2", - "updated_at": "2025-11-18T00:09:06.924239+00:00", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "sentence-transformers/all-mpnet-base-v2-20251117", - "model_variant_slug": "sentence-transformers/all-mpnet-base-v2", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "DeepInfra | sentence-transformers/all-mpnet-base-v2-20251117", + "name": "Fireworks | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0", + "completion": "0.0000009000000000000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000045000000000000003", + "prompt": "0.0000009000000000000001" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "sentence-transformers/all-mpnet-base-v2", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/llama-v3p3-70b-instruct", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra", - "quantization": "unknown", + "provider_slug": "fireworks/fp16", + "quantization": "fp16", "supported_parameters": [ "max_tokens", "temperature", @@ -58963,11 +60426,13 @@ "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", - "response_format" + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, @@ -58975,70 +60440,61 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": false, - "hf_slug": "sentence-transformers/all-mpnet-base-v2", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Sentence Transformers: all-mpnet-base-v2", - "output_modalities": ["embeddings"], - "permaslug": "sentence-transformers/all-mpnet-base-v2-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "all-mpnet-base-v2", - "slug": "sentence-transformers/all-mpnet-base-v2", - "updated_at": "2025-11-18T00:09:06.924239+00:00", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "sentence-transformers", - "context_length": 512, - "created_at": "2025-11-18T02:02:19.770104+00:00", + "author": "minimax", + "context_length": 204800, + "created_at": "2025-12-23T01:56:37+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.9 }, "default_stops": [], - "default_system": null, - "description": "The multi-qa-mpnet-base-dot-v1 embedding model transforms sentences and short paragraphs into a 768-dimensional dense vector space, generating high-quality semantic embeddings optimized for question-and-answer retrieval, semantic search, and similarity-scoring across diverse content.", + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 512, + "context_length": 204800, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "763c1e1e-10bf-43b8-82f5-a73847ef2d7f", + "id": "bc094db1-5408-4497-8d4d-09ebc3ac82c3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -59051,229 +60507,183 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sentence-transformers", - "context_length": 8192, - "created_at": "2025-11-18T02:02:19.770104+00:00", + "author": "minimax", + "context_length": 204800, + "created_at": "2025-12-23T01:56:37+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.9 }, "default_stops": [], - "default_system": null, - "description": "The multi-qa-mpnet-base-dot-v1 embedding model transforms sentences and short paragraphs into a 768-dimensional dense vector space, generating high-quality semantic embeddings optimized for question-and-answer retrieval, semantic search, and similarity-scoring across diverse content.", + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Other", - "has_text_output": false, - "hf_slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1", + "has_text_output": true, + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Sentence Transformers: multi-qa-mpnet-base-dot-v1", - "output_modalities": ["embeddings"], - "permaslug": "sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117", + "name": "MiniMax: MiniMax M2.1", + "output_modalities": ["text"], + "permaslug": "minimax/minimax-m2.1", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "multi-qa-mpnet-base-dot-v1", - "slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1", - "updated_at": "2025-11-18T02:47:07.243977+00:00", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117", - "model_variant_slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1", + "model_variant_permaslug": "minimax/minimax-m2.1", + "model_variant_slug": "minimax/minimax-m2.1", "moderation_required": false, - "name": "DeepInfra | sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117", + "name": "Fireworks | minimax/minimax-m2.1", "pricing": { - "completion": "0", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.0000003" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "sentence-transformers/multi-qa-mpnet-base-dot-v1", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/minimax-m2p1", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra", + "provider_slug": "fireworks", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", - "response_format" + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Other", - "has_text_output": false, - "hf_slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1", + "has_text_output": true, + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Sentence Transformers: multi-qa-mpnet-base-dot-v1", - "output_modalities": ["embeddings"], - "permaslug": "sentence-transformers/multi-qa-mpnet-base-dot-v1-20251117", + "name": "MiniMax: MiniMax M2.1", + "output_modalities": ["text"], + "permaslug": "minimax/minimax-m2.1", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "multi-qa-mpnet-base-dot-v1", - "slug": "sentence-transformers/multi-qa-mpnet-base-dot-v1", - "updated_at": "2025-11-18T02:47:07.243977+00:00", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "sentence-transformers", - "context_length": 512, - "created_at": "2025-11-18T02:20:54.215887+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The paraphrase-MiniLM-L6-v2 embedding model converts sentences and short paragraphs into a 384-dimensional dense vector space, producing high-quality semantic embeddings optimized for paraphrase detection, semantic similarity scoring, clustering, and lightweight retrieval tasks.", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 512, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { @@ -59286,7 +60696,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "89d827bb-abe3-4bec-8956-2233d1bf5a2b", + "id": "86a689aa-4988-4f26-9c3b-514c03345f2a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -59299,19 +60709,14 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sentence-transformers", - "context_length": 8192, - "created_at": "2025-11-18T02:20:54.215887+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The paraphrase-MiniLM-L6-v2 embedding model converts sentences and short paragraphs into a 384-dimensional dense vector space, producing high-quality semantic embeddings optimized for paraphrase detection, semantic similarity scoring, clustering, and lightweight retrieval tasks.", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -59319,139 +60724,87 @@ } }, "group": "Other", - "has_text_output": false, - "hf_slug": "sentence-transformers/paraphrase-MiniLM-L6-v2", + "has_text_output": true, + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Sentence Transformers: paraphrase-MiniLM-L6-v2", - "output_modalities": ["embeddings"], - "permaslug": "sentence-transformers/paraphrase-minilm-l6-v2-20251117", + "name": "MoonshotAI: Kimi K2 0905", + "output_modalities": ["text"], + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "paraphrase-MiniLM-L6-v2", - "slug": "sentence-transformers/paraphrase-minilm-l6-v2", - "updated_at": "2025-11-18T02:46:57.195578+00:00", - "warning_message": null + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null }, - "model_variant_permaslug": "sentence-transformers/paraphrase-minilm-l6-v2-20251117", - "model_variant_slug": "sentence-transformers/paraphrase-minilm-l6-v2", + "model_variant_permaslug": "moonshotai/kimi-k2-0905", + "model_variant_slug": "moonshotai/kimi-k2-0905", "moderation_required": false, - "name": "DeepInfra | sentence-transformers/paraphrase-minilm-l6-v2-20251117", + "name": "Fireworks | moonshotai/kimi-k2-0905", "pricing": { - "completion": "0", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000003", + "prompt": "0.0000006" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "sentence-transformers/paraphrase-MiniLM-L6-v2", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/kimi-k2-instruct-0905", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra", - "quantization": "unknown", + "provider_slug": "fireworks/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -59459,20 +60812,23 @@ "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", - "response_format" + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -59480,31 +60836,31 @@ } }, "group": "Other", - "has_text_output": false, - "hf_slug": "sentence-transformers/paraphrase-MiniLM-L6-v2", + "has_text_output": true, + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Sentence Transformers: paraphrase-MiniLM-L6-v2", - "output_modalities": ["embeddings"], - "permaslug": "sentence-transformers/paraphrase-minilm-l6-v2-20251117", + "name": "MoonshotAI: Kimi K2 0905", + "output_modalities": ["text"], + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "paraphrase-MiniLM-L6-v2", - "slug": "sentence-transformers/paraphrase-minilm-l6-v2", - "updated_at": "2025-11-18T02:46:57.195578+00:00", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "thenlper", - "context_length": 512, - "created_at": "2025-11-18T02:43:40.915161+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -59512,19 +60868,20 @@ }, "default_stops": [], "default_system": null, - "description": "The gte-base embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, delivering efficient and effective semantic embeddings optimized for textual similarity, semantic search, and clustering applications.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 512, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -59534,7 +60891,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "4b1b4c07-7ad9-416c-b731-381e0c528c74", + "id": "2e3d0ec2-3384-498b-9654-5ae800e3d886", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -59547,9 +60904,9 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "thenlper", - "context_length": 8192, - "created_at": "2025-11-18T02:43:40.915161+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -59557,165 +60914,115 @@ }, "default_stops": [], "default_system": null, - "description": "The gte-base embedding model encodes English sentences and paragraphs into a 768-dimensional dense vector space, delivering efficient and effective semantic embeddings optimized for textual similarity, semantic search, and clustering applications.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "features": { "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, "group": "Other", - "has_text_output": false, - "hf_slug": "thenlper/gte-base", + "has_text_output": true, + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Thenlper: GTE-Base", - "output_modalities": ["embeddings"], - "permaslug": "thenlper/gte-base-20251117", + "name": "MoonshotAI: Kimi K2.5", + "output_modalities": ["text"], + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "GTE-Base", - "slug": "thenlper/gte-base", - "updated_at": "2025-11-18T16:27:19.105717+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "thenlper/gte-base-20251117", - "model_variant_slug": "thenlper/gte-base", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "DeepInfra | thenlper/gte-base-20251117", + "name": "Fireworks | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000010000000000000001", + "prompt": "0.0000006" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "thenlper/gte-base", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/kimi-k2p5", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra", + "provider_slug": "fireworks", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", - "temperature", - "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", - "response_format" + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, @@ -59723,36 +61030,34 @@ "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, "group": "Other", - "has_text_output": false, - "hf_slug": "thenlper/gte-base", + "has_text_output": true, + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Thenlper: GTE-Base", - "output_modalities": ["embeddings"], - "permaslug": "thenlper/gte-base-20251117", + "name": "MoonshotAI: Kimi K2.5", + "output_modalities": ["text"], + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "GTE-Base", - "slug": "thenlper/gte-base", - "updated_at": "2025-11-18T16:27:19.105717+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, { - "author": "thenlper", - "context_length": 512, - "created_at": "2025-11-18T02:40:55.377007+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -59760,19 +61065,25 @@ }, "default_stops": [], "default_system": null, - "description": "The gte-large embedding model converts English sentences, paragraphs and moderate-length documents into a 1024-dimensional dense vector space, delivering high-quality semantic embeddings optimized for information retrieval, semantic textual similarity, reranking and clustering tasks. Trained via multi-stage contrastive learning on a large domain-diverse relevance corpus, it offers excellent performance across general-purpose embedding use-cases.", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 512, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -59782,7 +61093,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "31f228a6-f183-4617-950d-7c4a14d74acd", + "id": "0d478558-8c4b-4f0c-b3b0-871c959225ae", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -59795,9 +61106,9 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "thenlper", - "context_length": 8192, - "created_at": "2025-11-18T02:40:55.377007+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -59805,218 +61116,175 @@ }, "default_stops": [], "default_system": null, - "description": "The gte-large embedding model converts English sentences, paragraphs and moderate-length documents into a 1024-dimensional dense vector space, delivering high-quality semantic embeddings optimized for information retrieval, semantic textual similarity, reranking and clustering tasks. Trained via multi-stage contrastive learning on a large domain-diverse relevance corpus, it offers excellent performance across general-purpose embedding use-cases.", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Other", - "has_text_output": false, - "hf_slug": "thenlper/gte-large", + "group": "GPT", + "has_text_output": true, + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Thenlper: GTE-Large", - "output_modalities": ["embeddings"], - "permaslug": "thenlper/gte-large-20251117", + "name": "OpenAI: gpt-oss-120b", + "output_modalities": ["text"], + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GTE-Large", - "slug": "thenlper/gte-large", - "updated_at": "2025-11-18T16:28:48.144005+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "thenlper/gte-large-20251117", - "model_variant_slug": "thenlper/gte-large", + "model_variant_permaslug": "openai/gpt-oss-120b", + "model_variant_slug": "openai/gpt-oss-120b", "moderation_required": false, - "name": "DeepInfra | thenlper/gte-large-20251117", + "name": "Fireworks | openai/gpt-oss-120b", "pricing": { - "completion": "0", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000075", + "prompt": "0.00000015" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "thenlper/gte-large", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/gpt-oss-120b", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra", + "provider_slug": "fireworks", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", - "response_format" + "repetition_penalty", + "logit_bias", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Other", - "has_text_output": false, - "hf_slug": "thenlper/gte-large", + "group": "GPT", + "has_text_output": true, + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Thenlper: GTE-Large", - "output_modalities": ["embeddings"], - "permaslug": "thenlper/gte-large-20251117", + "name": "OpenAI: gpt-oss-120b", + "output_modalities": ["text"], + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GTE-Large", - "slug": "thenlper/gte-large", - "updated_at": "2025-11-18T16:28:48.144005+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "microsoft", - "context_length": 65536, - "created_at": "2024-04-16T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["USER:", ""], + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 65536, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -60026,7 +61294,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "03ac4ad1-a230-4ce7-821c-e797305733df", + "id": "c811d748-bd32-44ec-b6f5-abc4b87c7381", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -60035,210 +61303,189 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "microsoft", - "context_length": 65536, - "created_at": "2024-04-16T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["USER:", ""], + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", - "features": {}, - "group": "Mistral", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "microsoft/WizardLM-2-8x22B", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "vicuna", + "instruct_type": null, "model_version_group_id": null, - "name": "WizardLM-2 8x22B", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "microsoft/wizardlm-2-8x22b", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-20b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "WizardLM-2 8x22B", - "slug": "microsoft/wizardlm-2-8x22b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "microsoft/wizardlm-2-8x22b", - "model_variant_slug": "microsoft/wizardlm-2-8x22b", + "model_variant_permaslug": "openai/gpt-oss-20b", + "model_variant_slug": "openai/gpt-oss-20b", "moderation_required": false, - "name": "DeepInfra | microsoft/wizardlm-2-8x22b", + "name": "Fireworks | openai/gpt-oss-20b", "pricing": { - "completion": "0.00000048", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000048", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000035", + "prompt": "0.00000007" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "microsoft/WizardLM-2-8x22B", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/gpt-oss-20b", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra/bf16", - "quantization": "bf16", + "provider_slug": "fireworks", + "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", - "response_format" + "repetition_penalty", + "logit_bias", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "microsoft/WizardLM-2-8x22B", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "vicuna", + "instruct_type": null, "model_version_group_id": null, - "name": "WizardLM-2 8x22B", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "microsoft/wizardlm-2-8x22b", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-20b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "WizardLM-2 8x22B", - "slug": "microsoft/wizardlm-2-8x22b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-07-25T19:22:27.278283+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, + "author": "qwen", + "context_length": 128000, + "created_at": "2025-03-24T18:10:38.542849+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -60248,7 +61495,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "1323a290-9c85-4609-89af-d4c085e246c6", + "id": "aac76f56-eedb-405f-a07c-5bc66b877c1e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -60261,238 +61508,153 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-07-25T19:22:27.278283+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, + "author": "qwen", + "context_length": 32768, + "created_at": "2025-03-24T18:10:38.542849+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5", + "hf_slug": "Qwen/Qwen2.5-VL-32B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5", + "name": "Qwen: Qwen2.5 VL 32B Instruct", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "qwen/qwen2.5-vl-32b-instruct", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5", - "slug": "z-ai/glm-4.5", - "updated_at": "2026-01-05T22:04:10.598351+00:00", + "short_name": "Qwen2.5 VL 32B Instruct", + "slug": "qwen/qwen2.5-vl-32b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.5", - "model_variant_slug": "z-ai/glm-4.5", + "model_variant_permaslug": "qwen/qwen2.5-vl-32b-instruct", + "model_variant_slug": "qwen/qwen2.5-vl-32b-instruct", "moderation_required": false, - "name": "DeepInfra | z-ai/glm-4.5", + "name": "Fireworks | qwen/qwen2.5-vl-32b-instruct", "pricing": { - "completion": "0.0000016", + "completion": "0.0000009000000000000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000038", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000045000000000000003", + "prompt": "0.0000009000000000000001" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "zai-org/GLM-4.5", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra/fp8", - "quantization": "fp8", + "provider_slug": "fireworks", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", - "response_format" + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5", + "hf_slug": "Qwen/Qwen2.5-VL-32B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5", + "name": "Qwen: Qwen2.5 VL 32B Instruct", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "qwen/qwen2.5-vl-32b-instruct", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5", - "slug": "z-ai/glm-4.5", - "updated_at": "2026-01-05T22:04:10.598351+00:00", + "short_name": "Qwen2.5 VL 32B Instruct", + "slug": "qwen/qwen2.5-vl-32b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 202752, - "created_at": "2025-09-30T12:32:56.306946+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": null - }, - "default_stops": [], + "author": "qwen", + "context_length": 131072, + "created_at": "2025-04-28T21:29:17.25671+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 202752, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -60502,7 +61664,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c8607edc-8577-4199-b03d-1213da4743dc", + "id": "d88007ff-3317-4c16-8257-49bfab63abd9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -60515,160 +61677,100 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 200000, - "created_at": "2025-09-30T12:32:56.306946+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": null - }, - "default_stops": [], + "author": "qwen", + "context_length": 131072, + "created_at": "2025-04-28T21:29:17.25671+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-235B-A22B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "Qwen: Qwen3 235B A22B", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "qwen/qwen3-235b-a22b-04-28", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "Qwen3 235B A22B", + "slug": "qwen/qwen3-235b-a22b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6", - "model_variant_slug": "z-ai/glm-4.6", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-04-28", + "model_variant_slug": "qwen/qwen3-235b-a22b", "moderation_required": false, - "name": "DeepInfra | z-ai/glm-4.6", + "name": "Fireworks | qwen/qwen3-235b-a22b-04-28", "pricing": { - "completion": "0.00000175", + "completion": "0.00000088", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000000799999993", - "internal_reasoning": "0", - "prompt": "0.00000043", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000011", + "prompt": "0.00000022" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "zai-org/GLM-4.6", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/qwen3-235b-a22b", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra/fp4", - "quantization": "fp4", + "provider_slug": "fireworks", + "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", @@ -60678,11 +61780,13 @@ "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", "response_format", + "structured_outputs", "tools", "tool_choice" ], @@ -60693,325 +61797,256 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-235B-A22B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "Qwen: Qwen3 235B A22B", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "qwen/qwen3-235b-a22b-04-28", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "Qwen3 235B A22B", + "slug": "qwen/qwen3-235b-a22b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-12-08T15:24:22.464154+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.8, - "top_p": 0.6 - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { - "supports_base64_video_input": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - }, - "supports_video_urls": false + } }, "has_chat_completions": true, "has_completions": true, - "id": "118b9f16-32ae-4579-b723-815eb609bde5", + "id": "1ad1ddae-5e60-4396-bfea-a2ad2fc6ed33", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 110, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-12-08T15:24:22.464154+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.8, - "top_p": 0.6 - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.6V", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6V", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6-20251208", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.6V", - "slug": "z-ai/glm-4.6v", - "updated_at": "2025-12-08T15:45:24.970322+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6-20251208", - "model_variant_slug": "z-ai/glm-4.6v", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "DeepInfra | z-ai/glm-4.6-20251208", + "name": "Fireworks | qwen/qwen3-235b-a22b-07-25", "pricing": { - "completion": "0.0000009", + "completion": "0.00000088", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000011", + "prompt": "0.00000022" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "zai-org/GLM-4.6V", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra/fp8", + "provider_slug": "fireworks/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", - "response_format", + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", "tools", - "tool_choice", - "structured_outputs" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.6V", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6V", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6-20251208", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.6V", - "slug": "z-ai/glm-4.6v", - "updated_at": "2025-12-08T15:45:24.970322+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 202752, - "created_at": "2025-12-22T04:33:34.884504+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-23T00:29:06+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "endpoint": { - "adapter_name": "DeepInfraAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 202752, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { - "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": false, - "literal_required": false, - "type_function": false + "literal_none": true, + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "ab79ffa8-8dcc-41c4-9a9f-6cc408135243", + "id": "4117a5d2-961b-4065-99dd-554484dc6443", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -61024,257 +62059,178 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 200000, - "created_at": "2025-12-22T04:33:34.884504+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, + "author": "qwen", + "context_length": 1048576, + "created_at": "2025-07-23T00:29:06+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.7", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.7", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.7-20251222", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.7", - "slug": "z-ai/glm-4.7", - "updated_at": "2026-01-07T19:34:06.523149+00:00", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.7-20251222", - "model_variant_slug": "z-ai/glm-4.7", + "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "model_variant_slug": "qwen/qwen3-coder", "moderation_required": false, - "name": "DeepInfra | z-ai/glm-4.7-20251222", + "name": "Fireworks | qwen/qwen3-coder-480b-a35b-07-25", "pricing": { - "completion": "0.00000175", + "completion": "0.0000018000000000000001", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000000799999993", - "internal_reasoning": "0", - "prompt": "0.00000043", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000022500000000000002", + "prompt": "0.00000045000000000000003" }, - "provider_display_name": "DeepInfra", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepInfraAdapter", - "baseUrl": "https://api.deepinfra.com/v1/openai", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://deepinfra.com/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://deepinfra.com/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "DeepInfra", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/DeepInfra.webp" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "anthropic/claude-4-opus", - "anthropic/claude-4-sonnet", - "deepseek-ai/DeepSeek-R1-0528-Turbo", - "meta-llama/Llama-2-70b-chat-hf", - "mistralai/Mixtral-8x22B-Instruct-v0.1", - "google/gemma-1.1-7b-it", - "microsoft/Phi-3-medium-4k-instruct", - "google/gemma-2-27b-it", - "microsoft/WizardLM-2-7B", - "mattshumer/Reflection-Llama-3.1-70B", - "Sao10K/L3-8B-Lunaris-v1", - "openbmb/MiniCPM-Llama3-V-2_5", - "Qwen/QVQ-72B-Preview", - "deepinfra/airoboros-70b", - "Qwen/QwQ-32B-Preview", - "Phind/Phind-CodeLlama-34B-v2", - "lizpreciatior/lzlv_70b_fp16_hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "cognitivecomputations/dolphin-2.6-mixtral-8x7b", - "cognitivecomputations/dolphin-2.9.1-llama-3-70b", - "Qwen/Qwen2-72B-Instruct", - "Qwen/Qwen2-7B-Instruct", - "google/gemma-2-9b-it", - "Sao10K/L3-70B-Euryale-v2.1", - "google/codegemma-7b-it", - "mistralai/Mistral-7B-Instruct-v0.1", - "KoboldAI/LLaMA2-13B-Tiefighter", - "meta-llama/Llama-2-13b-chat-hf", - "openchat/openchat_3.5", - "openchat/openchat-3.6-8b", - "bigcode/starcoder2-15b-instruct-v0.1", - "Gryphe/MythoMax-L2-13b-turbo", - "Austism/chronos-hermes-13b-v2", - "Qwen/Qwen2.5-Coder-7B", - "moonshotai/Kimi-K2-Instruct", - "google/gemini-1.5-flash", - "google/gemini-2.5-flash", - "google/gemini-2.0-flash-001", - "anthropic/claude-3-7-sonnet-latest", - "google/gemini-1.5-flash-8b", - "google/gemini-2.5-pro", - "NovaSky-AI/Sky-T1-32B-Preview", - "allenai/olmOCR-7B-0725-FP8", - "allenai/olmOCR-7B-0825", - "deepseek-ai/DeepSeek-V3-0324-Turbo", - "PaddlePaddle/PaddleOCR-VL-0.9B", - "allenai/olmOCR-7B-1025", - "allenai/olmOCR-2-7B-1025", - "allenai/olmOCR-2", - "deepseek-ai/DeepSeek-OCR", - "meta-llama/Llama-3.2-1B-Instruct", - "sentence-transformers/clip-ViT-B-32-multilingual-v1", - "shibing624/text2vec-base-chinese", - "sentence-transformers/clip-ViT-B-32", - "BAAI/bge-en-icl", - "Qwen/Qwen3-Embedding-8B-batch", - "Qwen/Qwen3-Embedding-4B-batch", - "mistralai/Mistral-Small-3.1-24B-Instruct-2503", - "meta-llama/Meta-Llama-3.1-405B-Instruct", - "BAAI/bge-m3-multi", - "google/embeddinggemma-300m", - "Qwen/Qwen3-Embedding-0.6B-batch", - "meta-llama/Llama-4-Maverick-17B-128E-Instruct-Turbo" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepInfra", - "owners": ["{}"], - "slug": "deepinfra", - "statusPageUrl": "https://status.deepinfra.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "zai-org/GLM-4.7", - "provider_name": "DeepInfra", + "provider_model_id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepinfra/fp4", - "quantization": "fp4", + "provider_slug": "fireworks", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "seed", - "min_p", + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", "response_format", + "structured_outputs", "tools", - "tool_choice", - "structured_outputs" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.7", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.7", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.7-20251222", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.7", - "slug": "z-ai/glm-4.7", - "updated_at": "2026-01-07T19:34:06.523149+00:00", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "DeepInfra", - "slug": "deepinfra" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": true - }, - "displayName": "DeepSeek", - "headquarters": "CN", - "icon": { - "url": "/images/icons/DeepSeek.png" - }, - "models": [ + }, { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-12-01T13:10:42.818885+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-23T23:04:47+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "endpoint": { - "adapter_name": "DeepSeekAdapter", + "adapter_name": "FireworksAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://chat.deepseek.com/downloads/DeepSeek%20Privacy%20Policy.html", - "retainsPrompts": true, - "termsOfServiceURL": "https://chat.deepseek.com/downloads/DeepSeek%20Terms%20of%20Use.html", - "training": true + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "training": false }, "features": { - "reasoning_return_mechanism": "reasoning-content", - "supports_implicit_caching": true, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -61284,7 +62240,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "684ad760-951b-4f00-a25f-8a5c85e979d6", + "id": "420bd9d7-e714-487a-828a-ac4d279bf18e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -61293,188 +62249,186 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 64000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", + "author": "qwen", "context_length": 131072, - "created_at": "2025-12-01T13:10:42.818885+00:00", + "created_at": "2025-09-23T23:04:47+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "DeepSeek", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "Qwen3 VL 235B A22B Instruct", + "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", - "model_variant_slug": "deepseek/deepseek-v3.2", + "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", + "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", "moderation_required": false, - "name": "DeepSeek | deepseek/deepseek-v3.2-20251201", + "name": "Fireworks | qwen/qwen3-vl-235b-a22b-instruct", "pricing": { - "completion": "0.00000042", + "completion": "0.00000088", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000028", - "internal_reasoning": "0", - "prompt": "0.00000028", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000011", + "prompt": "0.00000022" }, - "provider_display_name": "DeepSeek", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "DeepSeekAdapter", - "baseUrl": "https://api.deepseek.com/beta", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://chat.deepseek.com/downloads/DeepSeek%20Privacy%20Policy.html", - "retainsPrompts": true, - "termsOfServiceURL": "https://chat.deepseek.com/downloads/DeepSeek%20Terms%20of%20Use.html", - "training": true + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "training": false }, - "displayName": "DeepSeek", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "CN", + "headquarters": "US", "icon": { - "url": "/images/icons/DeepSeek.png" + "url": "/images/icons/Fireworks.png" }, - "ignoredProviderModels": ["deepseek-reasoner"], + "ignoredProviderModels": [ + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" + ], "isAbortable": true, - "isMultipartSupported": false, + "isMultipartSupported": true, "moderationRequired": false, - "name": "DeepSeek", - "owners": ["{}"], - "slug": "deepseek", - "statusPageUrl": "https://status.deepseek.com/" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "deepseek-chat", - "provider_name": "DeepSeek", + "provider_model_id": "accounts/fireworks/models/qwen3-vl-235b-a22b-instruct", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "deepseek", + "provider_slug": "fireworks", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", + "top_k", + "repetition_penalty", + "logit_bias", "logprobs", "top_logprobs", + "response_format", + "structured_outputs", "tools", - "tool_choice", - "response_format" + "tool_choice" ], - "supports_multipart": false, - "supports_reasoning": true, + "supports_multipart": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "DeepSeek", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "Qwen3 VL 235B A22B Instruct", + "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "DeepSeek", - "slug": "deepseek" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "Featherless", - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://featherless.ai/&size=256" - }, - "models": [ + }, { - "author": "alfredpros", - "context_length": 4096, - "created_at": "2025-04-14T14:44:34.216191+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "qwen", + "context_length": 262144, + "created_at": "2025-10-06T23:47:56.430294+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.7, + "top_p": 0.8 + }, + "default_stops": [], "default_system": null, - "description": "A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 4096, + "adapter_name": "FireworksAdapter", + "can_abort": true, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://featherless.ai/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://featherless.ai/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -61484,7 +62438,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "7b1e72cd-c414-4ed4-b277-2edaddbf97a0", + "id": "dbb294c1-20cd-4d67-b225-2ff03a050cc8", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -61493,88 +62447,110 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "alfredpros", - "context_length": 4096, - "created_at": "2025-04-14T14:44:34.216191+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "qwen", + "context_length": 262144, + "created_at": "2025-10-06T23:47:56.430294+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.7, + "top_p": 0.8 + }, + "default_stops": [], "default_system": null, - "description": "A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.", - "features": {}, - "group": "Other", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "AlfredPros/CodeLlama-7b-Instruct-Solidity", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "alpaca", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "alfredpros/codellama-7b-instruct-solidity", - "reasoning_config": null, + "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "CodeLLaMa 7B Instruct Solidity", - "slug": "alfredpros/codellama-7b-instruct-solidity", + "short_name": "Qwen3 VL 30B A3B Instruct", + "slug": "qwen/qwen3-vl-30b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "alfredpros/codellama-7b-instruct-solidity", - "model_variant_slug": "alfredpros/codellama-7b-instruct-solidity", + "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "model_variant_slug": "qwen/qwen3-vl-30b-a3b-instruct", "moderation_required": false, - "name": "Featherless | alfredpros/codellama-7b-instruct-solidity", + "name": "Fireworks | qwen/qwen3-vl-30b-a3b-instruct", "pricing": { - "completion": "0.0000012", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000008", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000075", + "prompt": "0.00000015" }, - "provider_display_name": "Featherless", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.featherless.ai/v1", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://featherless.ai/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://featherless.ai/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "Featherless", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://featherless.ai/&size=256" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "featherless-ai/Qwerky-72B", - "AlfredPros/CodeLlama-7b-Instruct-Solidity", - "EleutherAI/llemma_7b", - "featherless-ai/QRWKV-72B" - ], - "isAbortable": false, - "isMultipartSupported": false, - "moderationRequired": false, - "name": "Featherless", - "owners": ["{}"], - "slug": "featherless", - "statusPageUrl": "https://featherless.ai/status" - }, - "provider_model_id": "AlfredPros/CodeLlama-7b-Instruct-Solidity", - "provider_name": "Featherless", + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" + ], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" + }, + "provider_model_id": "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "featherless", + "provider_slug": "fireworks", "quantization": "unknown", "supported_parameters": [ "max_tokens", @@ -61583,57 +62559,81 @@ "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "min_p", - "seed" + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], - "supports_multipart": false, + "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Other", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "AlfredPros/CodeLlama-7b-Instruct-Solidity", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "alpaca", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "AlfredPros: CodeLLaMa 7B Instruct Solidity", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "alfredpros/codellama-7b-instruct-solidity", - "reasoning_config": null, + "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "CodeLLaMa 7B Instruct Solidity", - "slug": "alfredpros/codellama-7b-instruct-solidity", + "short_name": "Qwen3 VL 30B A3B Instruct", + "slug": "qwen/qwen3-vl-30b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "eleutherai", - "context_length": 4096, - "created_at": "2025-04-14T15:07:05.530993+00:00", - "default_parameters": {}, - "default_stops": ["Source: assistant", ""], + "author": "z-ai", + "context_length": 202752, + "created_at": "2025-09-30T12:32:56.306946+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens. Llemma models are particularly strong at chain-of-thought mathematical reasoning and using computational tools for mathematics, such as Python and formal theorem provers.", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 4096, + "adapter_name": "FireworksAdapter", + "can_abort": true, + "context_length": 202752, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://featherless.ai/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://featherless.ai/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -61643,7 +62643,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "eb965d1b-6176-49cd-a4ea-c8bae8bb8f1d", + "id": "d64561f9-4884-4c7d-875b-69ce3b5c8b98", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -61652,166 +62652,203 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "eleutherai", - "context_length": 4096, - "created_at": "2025-04-14T15:07:05.530993+00:00", - "default_parameters": {}, - "default_stops": ["Source: assistant", ""], + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-09-30T12:32:56.306946+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Llemma 7B is a language model for mathematics. It was initialized with Code Llama 7B weights, and trained on the Proof-Pile-2 for 200B tokens. Llemma models are particularly strong at chain-of-thought mathematical reasoning and using computational tools for mathematics, such as Python and formal theorem provers.", - "features": {}, + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, "group": "Other", "has_text_output": true, - "hf_slug": "EleutherAI/llemma_7b", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "code-llama", + "instruct_type": null, "model_version_group_id": null, - "name": "EleutherAI: Llemma 7b", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "eleutherai/llemma_7b", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.6", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llemma 7b", - "slug": "eleutherai/llemma_7b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, - "model_variant_permaslug": "eleutherai/llemma_7b", - "model_variant_slug": "eleutherai/llemma_7b", + "model_variant_permaslug": "z-ai/glm-4.6", + "model_variant_slug": "z-ai/glm-4.6", "moderation_required": false, - "name": "Featherless | eleutherai/llemma_7b", + "name": "Fireworks | z-ai/glm-4.6", "pricing": { - "completion": "0.0000012", + "completion": "0.0000021899999999999998", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000008", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000275", + "prompt": "0.00000055" }, - "provider_display_name": "Featherless", + "provider_display_name": "Fireworks", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.featherless.ai/v1", + "adapterName": "FireworksAdapter", + "baseUrl": "https://api.fireworks.ai/inference/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://featherless.ai/privacy", + "privacyPolicyURL": "https://fireworks.ai/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://featherless.ai/terms", + "termsOfServiceURL": "https://fireworks.ai/terms-of-service", "training": false }, - "displayName": "Featherless", + "displayName": "Fireworks", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://featherless.ai/&size=256" + "url": "/images/icons/Fireworks.png" }, "ignoredProviderModels": [ - "featherless-ai/Qwerky-72B", - "AlfredPros/CodeLlama-7b-Instruct-Solidity", - "EleutherAI/llemma_7b", - "featherless-ai/QRWKV-72B" + "accounts/fireworks/models/qwen2-vl-72b-instruct", + "accounts/fireworks/models/llama4-scout-instruct-basic", + "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", + "accounts/fireworks/models/qwen3-embedding-8b", + "accounts/fireworks/models/glm-4p5-air", + "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", + "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", + "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", + "accounts/perplexity/models/r1-1776", + "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", + "accounts/fireworks/models/qwen3-reranker-8b", + "accounts/fireworks/models/deepseek-v3p1-terminus", + "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", + "accounts/fireworks/models/deepseek-r1-basic", + "accounts/cogito/models/cogito-671b-v2-p1" ], - "isAbortable": false, - "isMultipartSupported": false, + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "Featherless", - "owners": ["{}"], - "slug": "featherless", - "statusPageUrl": "https://featherless.ai/status" + "name": "Fireworks", + "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], + "slug": "fireworks", + "statusPageUrl": "https://status.fireworks.ai/" }, - "provider_model_id": "EleutherAI/llemma_7b", - "provider_name": "Featherless", + "provider_model_id": "accounts/fireworks/models/glm-4p6", + "provider_name": "Fireworks", "provider_region": null, - "provider_slug": "featherless", + "provider_slug": "fireworks", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", "top_k", - "min_p", - "seed" + "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", + "tools", + "tool_choice" ], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, "group": "Other", "has_text_output": true, - "hf_slug": "EleutherAI/llemma_7b", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "code-llama", + "instruct_type": null, "model_version_group_id": null, - "name": "EleutherAI: Llemma 7b", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "eleutherai/llemma_7b", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.6", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llemma 7b", - "slug": "eleutherai/llemma_7b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null } ], - "name": "Featherless", - "slug": "featherless" + "name": "Fireworks", + "slug": "fireworks" }, { "dataPolicy": { "canPublish": false, - "retainsPrompts": false, + "retainsPrompts": true, "training": false }, - "displayName": "Fireworks", + "displayName": "Friendli", "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" }, "models": [ { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "FriendliAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -61821,7 +62858,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cbe2855c-ea77-474f-9190-11d51fc4921e", + "id": "986e98d3-8e2b-4943-b01b-b5c17f2ade41", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -61830,115 +62867,98 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 8000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", + "author": "meta-llama", "context_length": 131072, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "created_at": "2024-07-23T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "features": {}, - "group": "DeepSeek", + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", + "permaslug": "meta-llama/llama-3.1-8b-instruct", "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", - "model_variant_slug": "deepseek/deepseek-chat-v3-0324", + "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", "moderation_required": false, - "name": "Fireworks | deepseek/deepseek-chat-v3-0324", + "name": "Friendli | meta-llama/llama-3.1-8b-instruct", "pricing": { - "completion": "0.0000009000000000000001", + "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000009000000000000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, - "provider_display_name": "Fireworks", + "provider_display_name": "Friendli", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "FriendliAdapter", + "baseUrl": "https://api.friendli.ai/serverless/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, - "displayName": "Fireworks", + "displayName": "Friendli", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "LGAI-EXAONE/EXAONE-4.0-32B", + "LGAI-EXAONE/EXAONE-4.0.1-32B", + "LGAI-EXAONE/K-EXAONE-236B-A23B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "Friendli", + "owners": ["{}"], + "slug": "friendli", + "statusPageUrl": "https://status.friendli.ai/" }, - "provider_model_id": "accounts/fireworks/models/deepseek-v3-0324", - "provider_name": "Fireworks", + "provider_model_id": "meta-llama-3.1-8b-instruct", + "provider_name": "Friendli", "provider_region": null, - "provider_slug": "fireworks", + "provider_slug": "friendli", "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", + "seed", "top_k", + "min_p", "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", "tools", - "tool_choice" + "tool_choice", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, @@ -61947,46 +62967,45 @@ "variant": "standard" }, "features": {}, - "group": "DeepSeek", + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", + "permaslug": "meta-llama/llama-3.1-8b-instruct", "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-08-21T12:33:48+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "FriendliAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, "features": { "supported_parameters": {}, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -61996,7 +63015,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "0c00f0d2-8aad-4700-bd36-11cb9a717fde", + "id": "b5f4db53-f8da-406b-83bc-2929f11eef13", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -62005,183 +63024,149 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", + "author": "meta-llama", "context_length": 131072, - "created_at": "2025-08-21T12:33:48+00:00", + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", - "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3.1", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3.1", - "slug": "deepseek/deepseek-chat-v3.1", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3.1", - "model_variant_slug": "deepseek/deepseek-chat-v3.1", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "Fireworks | deepseek/deepseek-chat-v3.1", + "name": "Friendli | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0.00000168", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000056", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, - "provider_display_name": "Fireworks", + "provider_display_name": "Friendli", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "FriendliAdapter", + "baseUrl": "https://api.friendli.ai/serverless/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, - "displayName": "Fireworks", + "displayName": "Friendli", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "LGAI-EXAONE/EXAONE-4.0-32B", + "LGAI-EXAONE/EXAONE-4.0.1-32B", + "LGAI-EXAONE/K-EXAONE-236B-A23B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "Friendli", + "owners": ["{}"], + "slug": "friendli", + "statusPageUrl": "https://status.friendli.ai/" }, - "provider_model_id": "accounts/fireworks/models/deepseek-v3p1", - "provider_name": "Fireworks", + "provider_model_id": "meta-llama-3.3-70b-instruct", + "provider_name": "Friendli", "provider_region": null, - "provider_slug": "fireworks", + "provider_slug": "friendli", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", + "seed", "top_k", + "min_p", "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "response_format", - "structured_outputs", "tools", - "tool_choice" + "tool_choice", + "structured_outputs", + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", - "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3.1", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3.1", - "slug": "deepseek/deepseek-chat-v3.1", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", + "context_length": 447376, + "created_at": "2025-04-05T19:31:59.735804+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "FriendliAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 447376, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_multipart": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -62191,7 +63176,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f9518f40-e138-489b-9c40-9660d158d958", + "id": "0ee7adf4-f9e5-47e7-b4c7-be696689a546", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -62200,142 +63185,125 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, - "max_prompt_tokens": 4096, + "max_completion_tokens": 447376, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", + "context_length": 10000000, + "created_at": "2025-04-05T19:31:59.735804+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", "features": {}, - "group": "Llama3", + "group": "Llama4", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama 4 Scout", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", + "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", + "short_name": "Llama 4 Scout", + "slug": "meta-llama/llama-4-scout", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "model_variant_slug": "meta-llama/llama-4-scout", "moderation_required": false, - "name": "Fireworks | meta-llama/llama-3.3-70b-instruct", + "name": "Friendli | meta-llama/llama-4-scout-17b-16e-instruct", "pricing": { - "completion": "0.0000009000000000000001", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000009000000000000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, - "provider_display_name": "Fireworks", + "provider_display_name": "Friendli", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "FriendliAdapter", + "baseUrl": "https://api.friendli.ai/serverless/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, - "displayName": "Fireworks", + "displayName": "Friendli", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "LGAI-EXAONE/EXAONE-4.0-32B", + "LGAI-EXAONE/EXAONE-4.0.1-32B", + "LGAI-EXAONE/K-EXAONE-236B-A23B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "Friendli", + "owners": ["{}"], + "slug": "friendli", + "statusPageUrl": "https://status.friendli.ai/" }, - "provider_model_id": "accounts/fireworks/models/llama-v3p3-70b-instruct", - "provider_name": "Fireworks", + "provider_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "provider_name": "Friendli", "provider_region": null, - "provider_slug": "fireworks/fp16", - "quantization": "fp16", + "provider_slug": "friendli", + "quantization": "unknown", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", + "seed", "top_k", - "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "response_format", - "structured_outputs" + "min_p", + "repetition_penalty" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Llama4", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama 4 Scout", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", + "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", + "short_name": "Llama 4 Scout", + "slug": "meta-llama/llama-4-scout", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "minimax", - "context_length": 204800, + "context_length": 196608, "created_at": "2025-12-23T01:56:37+00:00", "default_parameters": { "frequency_penalty": null, @@ -62346,29 +63314,28 @@ "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "FriendliAdapter", "can_abort": true, - "context_length": 204800, + "context_length": 196608, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, "features": { - "is_mandatory_reasoning": true, "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": false, - "literal_required": true, + "literal_required": false, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "bc094db1-5408-4497-8d4d-09ebc3ac82c3", + "id": "521d5e70-7f6e-4337-ab9d-9b8f11bd2b86", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -62377,7 +63344,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 196608, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -62427,65 +63394,49 @@ "model_variant_permaslug": "minimax/minimax-m2.1", "model_variant_slug": "minimax/minimax-m2.1", "moderation_required": false, - "name": "Fireworks | minimax/minimax-m2.1", + "name": "Friendli | minimax/minimax-m2.1", "pricing": { "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, - "provider_display_name": "Fireworks", + "provider_display_name": "Friendli", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "FriendliAdapter", + "baseUrl": "https://api.friendli.ai/serverless/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, - "displayName": "Fireworks", + "displayName": "Friendli", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "LGAI-EXAONE/EXAONE-4.0-32B", + "LGAI-EXAONE/EXAONE-4.0.1-32B", + "LGAI-EXAONE/K-EXAONE-236B-A23B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "Friendli", + "owners": ["{}"], + "slug": "friendli", + "statusPageUrl": "https://status.friendli.ai/" }, - "provider_model_id": "accounts/fireworks/models/minimax-m2p1", - "provider_name": "Fireworks", + "provider_model_id": "MiniMaxAI/MiniMax-M2.1", + "provider_name": "Friendli", "provider_region": null, - "provider_slug": "fireworks", + "provider_slug": "friendli", "quantization": "unknown", "supported_parameters": [ "reasoning", @@ -62496,15 +63447,14 @@ "stop", "frequency_penalty", "presence_penalty", + "seed", "top_k", + "min_p", "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "response_format", - "structured_outputs", "tools", - "tool_choice" + "tool_choice", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, @@ -62545,22 +63495,22 @@ "warning_message": null }, { - "author": "moonshotai", + "author": "qwen", "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", + "created_at": "2025-07-21T17:39:15.880992+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "FriendliAdapter", "can_abort": true, "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, "features": { @@ -62573,7 +63523,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "86a689aa-4988-4f26-9c3b-514c03345f2a", + "id": "02b90f2a-e684-46b3-808b-eb88e1348e29", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -62582,17 +63532,17 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", + "author": "qwen", "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", + "created_at": "2025-07-21T17:39:15.880992+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "features": { "reasoning_config": { "end_token": null, @@ -62600,91 +63550,75 @@ "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-0905", - "model_variant_slug": "moonshotai/kimi-k2-0905", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "Fireworks | moonshotai/kimi-k2-0905", + "name": "Friendli | qwen/qwen3-235b-a22b-07-25", "pricing": { - "completion": "0.0000025", + "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, - "provider_display_name": "Fireworks", + "provider_display_name": "Friendli", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "FriendliAdapter", + "baseUrl": "https://api.friendli.ai/serverless/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, - "displayName": "Fireworks", + "displayName": "Friendli", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "LGAI-EXAONE/EXAONE-4.0-32B", + "LGAI-EXAONE/EXAONE-4.0.1-32B", + "LGAI-EXAONE/K-EXAONE-236B-A23B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "Friendli", + "owners": ["{}"], + "slug": "friendli", + "statusPageUrl": "https://status.friendli.ai/" }, - "provider_model_id": "accounts/fireworks/models/kimi-k2-instruct-0905", - "provider_name": "Fireworks", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "provider_name": "Friendli", "provider_region": null, - "provider_slug": "fireworks/fp8", - "quantization": "fp8", + "provider_slug": "friendli", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -62692,15 +63626,13 @@ "stop", "frequency_penalty", "presence_penalty", + "seed", "top_k", + "min_p", "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "response_format", - "structured_outputs", "tools", - "tool_choice" + "tool_choice", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, @@ -62715,58 +63647,52 @@ "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-25T13:19:17.179049+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "FriendliAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -62776,7 +63702,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "0d478558-8c4b-4f0c-b3b0-871c959225ae", + "id": "f49b0831-ff8f-458a-bcf5-7401c3a50ac6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -62785,21 +63711,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-25T13:19:17.179049+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -62808,90 +63734,74 @@ "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b", - "model_variant_slug": "openai/gpt-oss-120b", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", "moderation_required": false, - "name": "Fireworks | openai/gpt-oss-120b", + "name": "Friendli | qwen/qwen3-235b-a22b-thinking-2507", "pricing": { - "completion": "0.0000006", + "completion": "0.0000024", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, - "provider_display_name": "Fireworks", + "provider_display_name": "Friendli", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "FriendliAdapter", + "baseUrl": "https://api.friendli.ai/serverless/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, - "displayName": "Fireworks", + "displayName": "Friendli", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "LGAI-EXAONE/EXAONE-4.0-32B", + "LGAI-EXAONE/EXAONE-4.0.1-32B", + "LGAI-EXAONE/K-EXAONE-236B-A23B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "Friendli", + "owners": ["{}"], + "slug": "friendli", + "statusPageUrl": "https://status.friendli.ai/" }, - "provider_model_id": "accounts/fireworks/models/gpt-oss-120b", - "provider_name": "Fireworks", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "provider_name": "Friendli", "provider_region": null, - "provider_slug": "fireworks", + "provider_slug": "friendli", "quantization": "unknown", "supported_parameters": [ "reasoning", @@ -62902,11 +63812,14 @@ "stop", "frequency_penalty", "presence_penalty", + "seed", "top_k", + "min_p", "repetition_penalty", - "logit_bias", "tools", - "tool_choice" + "tool_choice", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, @@ -62922,55 +63835,56 @@ "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, { - "author": "openai", + "author": "qwen", "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "created_at": "2025-04-28T22:16:44.177326+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "FriendliAdapter", "can_abort": true, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": {}, - "supports_input_audio": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -62980,7 +63894,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c811d748-bd32-44ec-b6f5-abc4b87c7381", + "id": "6a005fee-4205-438d-a938-11f35a039fd3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -62989,130 +63903,113 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", + "author": "qwen", "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "created_at": "2025-04-28T22:16:44.177326+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen3-30B-A3B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen3 30B A3B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen3-30b-a3b-04-28", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 30B A3B", + "slug": "qwen/qwen3-30b-a3b", + "updated_at": "2026-01-08T19:57:57.475571+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", + "model_variant_slug": "qwen/qwen3-30b-a3b", "moderation_required": false, - "name": "Fireworks | openai/gpt-oss-20b", + "name": "Friendli | qwen/qwen3-30b-a3b-04-28", "pricing": { - "completion": "0.0000003", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000007", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, - "provider_display_name": "Fireworks", + "provider_display_name": "Friendli", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "FriendliAdapter", + "baseUrl": "https://api.friendli.ai/serverless/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, - "displayName": "Fireworks", + "displayName": "Friendli", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "LGAI-EXAONE/EXAONE-4.0-32B", + "LGAI-EXAONE/EXAONE-4.0.1-32B", + "LGAI-EXAONE/K-EXAONE-236B-A23B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "Friendli", + "owners": ["{}"], + "slug": "friendli", + "statusPageUrl": "https://status.friendli.ai/" }, - "provider_model_id": "accounts/fireworks/models/gpt-oss-20b", - "provider_name": "Fireworks", + "provider_model_id": "Qwen/Qwen3-30B-A3B", + "provider_name": "Friendli", "provider_region": null, - "provider_slug": "fireworks", + "provider_slug": "friendli", "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", + "seed", "top_k", + "min_p", "repetition_penalty", - "logit_bias", - "response_format", - "structured_outputs", "tools", "tool_choice" ], @@ -63123,68 +64020,70 @@ "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen3-30B-A3B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen3 30B A3B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen3-30b-a3b-04-28", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 30B A3B", + "slug": "qwen/qwen3-30b-a3b", + "updated_at": "2026-01-08T19:57:57.475571+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 128000, - "created_at": "2025-03-24T18:10:38.542849+00:00", - "default_parameters": {}, + "author": "z-ai", + "context_length": 202752, + "created_at": "2025-09-30T12:32:56.306946+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "FriendliAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 202752, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "aac76f56-eedb-405f-a07c-5bc66b877c1e", + "id": "4929f6aa-3dc2-47ae-80fd-626ad6fd8199", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -63193,278 +64092,97 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 202752, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 32768, - "created_at": "2025-03-24T18:10:38.542849+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", - "features": {}, - "group": "Qwen", - "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-32B-Instruct", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 32B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-32b-instruct", - "reasoning_config": null, - "router": null, - "short_name": "Qwen2.5 VL 32B Instruct", - "slug": "qwen/qwen2.5-vl-32b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "qwen/qwen2.5-vl-32b-instruct", - "model_variant_slug": "qwen/qwen2.5-vl-32b-instruct", - "moderation_required": false, - "name": "Fireworks | qwen/qwen2.5-vl-32b-instruct", - "pricing": { - "completion": "0.0000009000000000000001", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000009000000000000001", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "Fireworks", - "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", - "byokEnabled": true, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", - "training": false - }, - "displayName": "Fireworks", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", - "icon": { - "url": "/images/icons/Fireworks.png" + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-09-30T12:32:56.306946+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": null }, - "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" - ], - "isAbortable": true, - "isMultipartSupported": true, - "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" - }, - "provider_model_id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct", - "provider_name": "Fireworks", - "provider_region": null, - "provider_slug": "fireworks", - "quantization": "unknown", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "response_format", - "structured_outputs" - ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, - "variable_pricings": [], - "variant": "standard" - }, - "features": {}, - "group": "Qwen", - "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-32B-Instruct", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 32B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-32b-instruct", - "reasoning_config": null, - "router": null, - "short_name": "Qwen2.5 VL 32B Instruct", - "slug": "qwen/qwen2.5-vl-32b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:29:17.25671+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], - "default_system": null, - "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", - "endpoint": { - "adapter_name": "FireworksAdapter", - "can_abort": true, - "context_length": 131072, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", - "training": false - }, - "features": { - "supported_parameters": {}, - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": true, - "id": "d88007ff-3317-4c16-8257-49bfab63abd9", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": null, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:29:17.25671+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-04-28", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B", - "slug": "qwen/qwen3-235b-a22b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-04-28", - "model_variant_slug": "qwen/qwen3-235b-a22b", + "model_variant_permaslug": "z-ai/glm-4.6", + "model_variant_slug": "z-ai/glm-4.6", "moderation_required": false, - "name": "Fireworks | qwen/qwen3-235b-a22b-04-28", + "name": "Friendli | z-ai/glm-4.6", "pricing": { - "completion": "0.00000088", + "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000022", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, - "provider_display_name": "Fireworks", + "provider_display_name": "Friendli", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "FriendliAdapter", + "baseUrl": "https://api.friendli.ai/serverless/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://friendli.ai/privacypolicy", + "retainsPrompts": true, + "termsOfServiceURL": "https://friendli.ai/terms-of-service", "training": false }, - "displayName": "Fireworks", + "displayName": "Friendli", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "LGAI-EXAONE/EXAONE-4.0-32B", + "LGAI-EXAONE/EXAONE-4.0.1-32B", + "LGAI-EXAONE/K-EXAONE-236B-A23B" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "Friendli", + "owners": ["{}"], + "slug": "friendli", + "statusPageUrl": "https://status.friendli.ai/" }, - "provider_model_id": "accounts/fireworks/models/qwen3-235b-a22b", - "provider_name": "Fireworks", + "provider_model_id": "zai-org/GLM-4.6", + "provider_name": "Friendli", "provider_region": null, - "provider_slug": "fireworks", + "provider_slug": "friendli", "quantization": "unknown", "supported_parameters": [ "reasoning", @@ -63475,11 +64193,10 @@ "stop", "frequency_penalty", "presence_penalty", + "seed", "top_k", + "min_p", "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", "response_format", "structured_outputs", "tools", @@ -63492,56 +64209,73 @@ "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-04-28", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B", - "slug": "qwen/qwen3-235b-a22b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null - }, + } + ], + "name": "Friendli", + "slug": "friendli" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "datacenters": ["US"], + "displayName": "GMICloud", + "headquarters": "US", + "icon": { + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" + }, + "models": [ { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-03-24T13:59:15.252028+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "GMICloudAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -63551,194 +64285,173 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "1ad1ddae-5e60-4396-bfea-a2ad2fc6ed33", + "id": "42383b7f-b355-4879-b615-191788e08606", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 110, + "limit_rpm": null, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-03-24T13:59:15.252028+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "features": {}, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", + "model_variant_slug": "deepseek/deepseek-chat-v3-0324", "moderation_required": false, - "name": "Fireworks | qwen/qwen3-235b-a22b-07-25", + "name": "GMICloud | deepseek/deepseek-chat-v3-0324", "pricing": { "completion": "0.00000088", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000022", - "request": "0", - "web_search": "0" + "prompt": "0.00000028" }, - "provider_display_name": "Fireworks", + "provider_display_name": "GMICloud", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "GMICloudAdapter", + "baseUrl": "https://api.gmi-serving.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", "training": false }, - "displayName": "Fireworks", + "displayName": "GMICloud", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "meta-llama/Llama-3.3-70B-Instruct", + "meta-llama/Llama-3.1-8B-Instruct", + "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", + "deepseek-ai/DeepSeek-V3.1-Terminus", + "allenai/olmOCR-2-7B-1025-FP8", + "tencent/HunyuanOCR", + "google/gemini-3-flash-preview", + "google/gemini-3-pro-preview", + "openai/gpt-4o", + "openai/gpt-5", + "openai/gpt-5.1", + "openai/gpt-5.1-chat", + "openai/gpt-5.2-chat", + "openai/gpt-5.2", + "openai/gpt-4o-mini", + "anthropic/claude-opus-4.1", + "anthropic/claude-3.7-sonnet", + "anthropic/claude-sonnet-4", + "anthropic/claude-opus-4", + "anthropic/claude-opus-4.5", + "anthropic/claude-sonnet-4.5", + "anthropic/claude-haiku-4.5", + "Wan-AI/Wan2.2-I2V-A14B", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "GMICloud", + "owners": ["{}"], + "slug": "gmicloud", + "statusPageUrl": null }, - "provider_model_id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507", - "provider_name": "Fireworks", + "provider_model_id": "deepseek-ai/DeepSeek-V3-0324", + "provider_name": "GMICloud", "provider_region": null, - "provider_slug": "fireworks/fp8", + "provider_slug": "gmicloud/fp8", "quantization": "fp8", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "tools", - "tool_choice" + "seed", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T21:43:52.421936+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue. It supports seamless switching between \"thinking\" mode for math, coding, and logical inference, and \"non-thinking\" mode for general conversation. The model is fine-tuned for instruction-following, agent integration, creative writing, and multilingual use across 100+ languages and dialects. It natively supports a 32K token context window and can extend to 131K tokens with YaRN scaling.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "GMICloudAdapter", "can_abort": true, - "context_length": 40960, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -63748,7 +64461,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cd308c60-76cb-4270-bcd2-4a87684a3b82", + "id": "08b62c70-98d8-42d4-8c70-01698480d7e1", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -63761,182 +64474,174 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:43:52.421936+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue. It supports seamless switching between \"thinking\" mode for math, coding, and logical inference, and \"non-thinking\" mode for general conversation. The model is fine-tuned for instruction-following, agent integration, creative writing, and multilingual use across 100+ languages and dialects. It natively supports a 32K token context window and can extend to 131K tokens with YaRN scaling.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-8B", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwen3", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 8B", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-8b-04-28", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Qwen3 8B", - "slug": "qwen/qwen3-8b", - "updated_at": "2026-01-08T19:54:39.933523+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-8b-04-28", - "model_variant_slug": "qwen/qwen3-8b", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "Fireworks | qwen/qwen3-8b-04-28", + "name": "GMICloud | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.00000020000000000000002", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000020000000000000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, - "provider_display_name": "Fireworks", + "provider_display_name": "GMICloud", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "GMICloudAdapter", + "baseUrl": "https://api.gmi-serving.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", "training": false }, - "displayName": "Fireworks", + "displayName": "GMICloud", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "meta-llama/Llama-3.3-70B-Instruct", + "meta-llama/Llama-3.1-8B-Instruct", + "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", + "deepseek-ai/DeepSeek-V3.1-Terminus", + "allenai/olmOCR-2-7B-1025-FP8", + "tencent/HunyuanOCR", + "google/gemini-3-flash-preview", + "google/gemini-3-pro-preview", + "openai/gpt-4o", + "openai/gpt-5", + "openai/gpt-5.1", + "openai/gpt-5.1-chat", + "openai/gpt-5.2-chat", + "openai/gpt-5.2", + "openai/gpt-4o-mini", + "anthropic/claude-opus-4.1", + "anthropic/claude-3.7-sonnet", + "anthropic/claude-sonnet-4", + "anthropic/claude-opus-4", + "anthropic/claude-opus-4.5", + "anthropic/claude-sonnet-4.5", + "anthropic/claude-haiku-4.5", + "Wan-AI/Wan2.2-I2V-A14B", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "GMICloud", + "owners": ["{}"], + "slug": "gmicloud", + "statusPageUrl": null }, - "provider_model_id": "accounts/fireworks/models/qwen3-8b", - "provider_name": "Fireworks", + "provider_model_id": "moonshotai/Kimi-K2.5", + "provider_name": "GMICloud", "provider_region": null, - "provider_slug": "fireworks", - "quantization": "unknown", + "provider_slug": "gmicloud/int4", + "quantization": "int4", "supported_parameters": [ "reasoning", "include_reasoning", "max_tokens", "temperature", "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "response_format", - "structured_outputs", - "tool_choice", - "tools" + "seed" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-8B", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwen3", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 8B", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-8b-04-28", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Qwen3 8B", - "slug": "qwen/qwen3-8b", - "updated_at": "2026-01-08T19:54:39.933523+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, { "author": "qwen", "context_length": 262144, - "created_at": "2025-07-23T00:29:06+00:00", + "created_at": "2025-09-11T17:36:53.6379+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "GMICloudAdapter", "can_abort": true, "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", "training": false }, "features": { @@ -63949,7 +64654,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "4117a5d2-961b-4065-99dd-554484dc6443", + "id": "9d33dd86-b592-406b-965f-f91977020855", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -63963,12 +64668,12 @@ "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 1048576, - "created_at": "2025-07-23T00:29:06+00:00", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "features": { "reasoning_config": { "end_token": null, @@ -63978,103 +64683,101 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "model_variant_slug": "qwen/qwen3-coder", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", "moderation_required": false, - "name": "Fireworks | qwen/qwen3-coder-480b-a35b-07-25", + "name": "GMICloud | qwen/qwen3-next-80b-a3b-instruct-2509", "pricing": { - "completion": "0.0000018000000000000001", + "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000045000000000000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, - "provider_display_name": "Fireworks", + "provider_display_name": "GMICloud", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "GMICloudAdapter", + "baseUrl": "https://api.gmi-serving.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", "training": false }, - "displayName": "Fireworks", + "displayName": "GMICloud", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "meta-llama/Llama-3.3-70B-Instruct", + "meta-llama/Llama-3.1-8B-Instruct", + "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", + "deepseek-ai/DeepSeek-V3.1-Terminus", + "allenai/olmOCR-2-7B-1025-FP8", + "tencent/HunyuanOCR", + "google/gemini-3-flash-preview", + "google/gemini-3-pro-preview", + "openai/gpt-4o", + "openai/gpt-5", + "openai/gpt-5.1", + "openai/gpt-5.1-chat", + "openai/gpt-5.2-chat", + "openai/gpt-5.2", + "openai/gpt-4o-mini", + "anthropic/claude-opus-4.1", + "anthropic/claude-3.7-sonnet", + "anthropic/claude-sonnet-4", + "anthropic/claude-opus-4", + "anthropic/claude-opus-4.5", + "anthropic/claude-sonnet-4.5", + "anthropic/claude-haiku-4.5", + "Wan-AI/Wan2.2-I2V-A14B", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "GMICloud", + "owners": ["{}"], + "slug": "gmicloud", + "statusPageUrl": null }, - "provider_model_id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct", - "provider_name": "Fireworks", + "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "provider_name": "GMICloud", "provider_region": null, - "provider_slug": "fireworks", - "quantization": "unknown", + "provider_slug": "gmicloud/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "response_format", - "structured_outputs", + "seed", "tools", "tool_choice" ], @@ -64093,23 +64796,23 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, @@ -64126,14 +64829,14 @@ "default_system": null, "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "endpoint": { - "adapter_name": "FireworksAdapter", + "adapter_name": "GMICloudAdapter", "can_abort": true, "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", "training": false }, "features": { @@ -64146,7 +64849,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "420bd9d7-e714-487a-828a-ac4d279bf18e", + "id": "b90705c3-9b23-4cc2-8366-59ba6666980d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -64202,82 +64905,82 @@ "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", "moderation_required": false, - "name": "Fireworks | qwen/qwen3-vl-235b-a22b-instruct", + "name": "GMICloud | qwen/qwen3-vl-235b-a22b-instruct", "pricing": { - "completion": "0.00000088", + "completion": "0.0000014", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000022", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, - "provider_display_name": "Fireworks", + "provider_display_name": "GMICloud", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "GMICloudAdapter", + "baseUrl": "https://api.gmi-serving.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", "training": false }, - "displayName": "Fireworks", + "displayName": "GMICloud", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", + "meta-llama/Llama-3.3-70B-Instruct", + "meta-llama/Llama-3.1-8B-Instruct", + "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", + "deepseek-ai/DeepSeek-V3.1-Terminus", + "allenai/olmOCR-2-7B-1025-FP8", + "tencent/HunyuanOCR", + "google/gemini-3-flash-preview", + "google/gemini-3-pro-preview", + "openai/gpt-4o", + "openai/gpt-5", + "openai/gpt-5.1", + "openai/gpt-5.1-chat", + "openai/gpt-5.2-chat", + "openai/gpt-5.2", + "openai/gpt-4o-mini", + "anthropic/claude-opus-4.1", + "anthropic/claude-3.7-sonnet", + "anthropic/claude-sonnet-4", + "anthropic/claude-opus-4", + "anthropic/claude-opus-4.5", + "anthropic/claude-sonnet-4.5", + "anthropic/claude-haiku-4.5", + "Wan-AI/Wan2.2-I2V-A14B", + "anthropic/claude-opus-4.6" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "GMICloud", + "owners": ["{}"], + "slug": "gmicloud", + "statusPageUrl": null }, - "provider_model_id": "accounts/fireworks/models/qwen3-vl-235b-a22b-instruct", - "provider_name": "Fireworks", + "provider_model_id": "Qwen/Qwen3-VL-235B-A22B-Instruct-FP8", + "provider_name": "GMICloud", "provider_region": null, - "provider_slug": "fireworks", - "quantization": "unknown", + "provider_slug": "gmicloud/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "response_format", - "structured_outputs", + "seed", "tools", - "tool_choice" + "tool_choice", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, @@ -64313,28 +65016,41 @@ "slug": "qwen/qwen3-vl-235b-a22b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - }, + } + ], + "name": "GMICloud", + "slug": "gmicloud" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Google Vertex", + "headquarters": "US", + "icon": { + "url": "/images/icons/GoogleVertex.svg" + }, + "models": [ { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-06T23:47:56.430294+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, + "author": "anthropic", + "context_length": 200000, + "created_at": "2024-03-13T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal", "endpoint": { - "adapter_name": "FireworksAdapter", - "can_abort": true, - "context_length": 262144, + "adapter_name": "GoogleVertexAnthropicAdapter", + "can_abort": false, + "context_length": 200000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { @@ -64346,138 +65062,136 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "dbb294c1-20cd-4d67-b225-2ff03a050cc8", + "has_completions": false, + "id": "c8ee28fa-00f7-4a61-adc8-f2272f7e0154", "is_byok": false, - "is_deranked": false, + "is_deranked": true, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-06T23:47:56.430294+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, + "author": "anthropic", + "context_length": 200000, + "created_at": "2024-03-13T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal", + "features": {}, + "group": "Claude", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Instruct", + "model_version_group_id": "028ec497-a034-40fd-81fe-f51d0a0c640c", + "name": "Anthropic: Claude 3 Haiku", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "anthropic/claude-3-haiku", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 VL 30B A3B Instruct", - "slug": "qwen/qwen3-vl-30b-a3b-instruct", + "short_name": "Claude 3 Haiku", + "slug": "anthropic/claude-3-haiku", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-instruct", - "model_variant_slug": "qwen/qwen3-vl-30b-a3b-instruct", + "model_variant_permaslug": "anthropic/claude-3-haiku", + "model_variant_slug": "anthropic/claude-3-haiku", "moderation_required": false, - "name": "Fireworks | qwen/qwen3-vl-30b-a3b-instruct", + "name": "Google | anthropic/claude-3-haiku", "pricing": { - "completion": "0.0000006", + "completion": "0.00000125", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000003", + "prompt": "0.00000025" }, - "provider_display_name": "Fireworks", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Fireworks", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "Google", + "owners": ["{}"], + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct", - "provider_name": "Fireworks", + "provider_model_id": "claude-3-haiku@20240307", + "provider_name": "Google", "provider_region": null, - "provider_slug": "fireworks", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", "top_p", - "stop", - "frequency_penalty", - "presence_penalty", "top_k", - "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "response_format", - "structured_outputs", + "stop", "tools", "tool_choice" ], @@ -64487,64 +65201,50 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Claude", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Instruct", + "model_version_group_id": "028ec497-a034-40fd-81fe-f51d0a0c640c", + "name": "Anthropic: Claude 3 Haiku", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "anthropic/claude-3-haiku", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 VL 30B A3B Instruct", - "slug": "qwen/qwen3-vl-30b-a3b-instruct", + "short_name": "Claude 3 Haiku", + "slug": "anthropic/claude-3-haiku", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 202752, - "created_at": "2025-09-30T12:32:56.306946+00:00", + "author": "anthropic", + "context_length": 200000, + "created_at": "2024-11-04T00:00:00+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, + "temperature": null, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.\n\nThis makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.\n\nThis model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).", "endpoint": { - "adapter_name": "FireworksAdapter", - "can_abort": true, - "context_length": 202752, + "adapter_name": "GoogleVertexAnthropicAdapter", + "can_abort": false, + "context_length": 200000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -64553,145 +65253,157 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "d64561f9-4884-4c7d-875b-69ce3b5c8b98", + "has_completions": false, + "id": "8636daa9-bd75-466e-8440-1aa27df5942a", "is_byok": false, - "is_deranked": false, + "is_deranked": true, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", + "author": "anthropic", "context_length": 200000, - "created_at": "2025-09-30T12:32:56.306946+00:00", + "created_at": "2024-11-04T00:00:00+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, + "temperature": null, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.\n\nThis makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.\n\nThis model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Claude", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, - "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "model_version_group_id": "028ec497-a034-40fd-81fe-f51d0a0c640c", + "name": "Anthropic: Claude 3.5 Haiku", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "anthropic/claude-3-5-haiku", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "Claude 3.5 Haiku", + "slug": "anthropic/claude-3.5-haiku", + "updated_at": "2025-12-05T21:54:21.40359+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6", - "model_variant_slug": "z-ai/glm-4.6", + "model_variant_permaslug": "anthropic/claude-3-5-haiku", + "model_variant_slug": "anthropic/claude-3.5-haiku", "moderation_required": false, - "name": "Fireworks | z-ai/glm-4.6", + "name": "Google | anthropic/claude-3-5-haiku", "pricing": { - "completion": "0.0000021899999999999998", + "completion": "0.000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000055", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000008", + "prompt": "0.0000008", + "web_search": "0.01" }, - "provider_display_name": "Fireworks", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "FireworksAdapter", - "baseUrl": "https://api.fireworks.ai/inference/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://fireworks.ai/privacy-policy", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, "retainsPrompts": false, - "termsOfServiceURL": "https://fireworks.ai/terms-of-service", + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Fireworks", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/Fireworks.png" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "accounts/fireworks/models/qwen2-vl-72b-instruct", - "accounts/fireworks/models/llama4-scout-instruct-basic", - "accounts/scale-ai/models/arctic-text2sql-r1-7b-public", - "accounts/fireworks/models/qwen3-embedding-8b", - "accounts/fireworks/models/glm-4p5-air", - "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507", - "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct", - "accounts/lukablaskovic2000-738cce/models/bio-mistral-7b", - "accounts/perplexity/models/r1-1776", - "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new", - "accounts/fireworks/models/qwen3-reranker-8b", - "accounts/fireworks/models/deepseek-v3p1-terminus", - "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b", - "accounts/fireworks/models/deepseek-r1-basic" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Fireworks", - "owners": ["org_34k70EDz4tN23XFsU2l3tSqm4wF"], - "slug": "fireworks", - "statusPageUrl": "https://status.fireworks.ai/" + "name": "Google", + "owners": ["{}"], + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "accounts/fireworks/models/glm-4p6", - "provider_name": "Fireworks", + "provider_model_id": "claude-3-5-haiku@20241022", + "provider_name": "Google", "provider_region": null, - "provider_slug": "fireworks", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "stop", - "frequency_penalty", - "presence_penalty", "top_k", - "repetition_penalty", - "logit_bias", - "logprobs", - "top_logprobs", + "stop", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" @@ -64699,66 +65411,55 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Claude", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, - "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "model_version_group_id": "028ec497-a034-40fd-81fe-f51d0a0c640c", + "name": "Anthropic: Claude 3.5 Haiku", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "anthropic/claude-3-5-haiku", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "Claude 3.5 Haiku", + "slug": "anthropic/claude-3.5-haiku", + "updated_at": "2025-12-05T21:54:21.40359+00:00", "warning_message": null - } - ], - "name": "Fireworks", - "slug": "fireworks" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": false - }, - "displayName": "Friendli", - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" - }, - "models": [ + }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-02-24T18:35:10.00008+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", "endpoint": { - "adapter_name": "FriendliAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "GoogleVertexAnthropicAdapter", + "can_abort": false, + "context_length": 200000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { @@ -64771,8 +65472,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "986e98d3-8e2b-4943-b01b-b5c17f2ade41", + "has_completions": false, + "id": "1c9b8776-e266-4efb-b5ba-19a6753e7736", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -64781,149 +65482,210 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8000, + "max_completion_tokens": 64000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-02-24T18:35:10.00008+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Claude", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "input_modalities": ["text", "image", "file"], + "instruct_type": null, + "model_version_group_id": "30636d20-cda3-4a59-aa0c-1a5b6efba072", + "name": "Anthropic: Claude 3.7 Sonnet", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", - "reasoning_config": null, + "permaslug": "anthropic/claude-3-7-sonnet-20250219", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Claude 3.7 Sonnet", + "slug": "anthropic/claude-3.7-sonnet", + "updated_at": "2025-12-05T21:54:07.586262+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_permaslug": "anthropic/claude-3-7-sonnet-20250219", + "model_variant_slug": "anthropic/claude-3.7-sonnet", "moderation_required": false, - "name": "Friendli | meta-llama/llama-3.1-8b-instruct", + "name": "Google | anthropic/claude-3-7-sonnet-20250219", "pricing": { - "completion": "0.0000001", + "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000003", + "prompt": "0.000003", + "web_search": "0.01" }, - "provider_display_name": "Friendli", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "FriendliAdapter", - "baseUrl": "https://api.friendli.ai/serverless/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Friendli", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "LGAI-EXAONE/EXAONE-4.0-32B", - "LGAI-EXAONE/EXAONE-4.0.1-32B" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Friendli", + "name": "Google", "owners": ["{}"], - "slug": "friendli", - "statusPageUrl": "https://status.friendli.ai/" + "slug": "google-vertex/us", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "meta-llama-3.1-8b-instruct", - "provider_name": "Friendli", + "provider_model_id": "claude-3-7-sonnet@20250219", + "provider_name": "Google", "provider_region": null, - "provider_slug": "friendli", + "provider_slug": "google-vertex/us", "quantization": "unknown", "supported_parameters": [ "max_tokens", - "temperature", "top_p", + "temperature", "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "top_k", - "min_p", - "repetition_penalty", - "response_format", - "structured_outputs", + "reasoning", + "include_reasoning", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Claude", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "input_modalities": ["text", "image", "file"], + "instruct_type": null, + "model_version_group_id": "30636d20-cda3-4a59-aa0c-1a5b6efba072", + "name": "Anthropic: Claude 3.7 Sonnet", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", - "reasoning_config": null, + "permaslug": "anthropic/claude-3-7-sonnet-20250219", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Claude 3.7 Sonnet", + "slug": "anthropic/claude-3.7-sonnet", + "updated_at": "2025-12-05T21:54:07.586262+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-10-15T17:00:38+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\n\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", "endpoint": { - "adapter_name": "FriendliAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "GoogleVertexAnthropicAdapter", + "can_abort": false, + "context_length": 200000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { "supported_parameters": {}, + "supports_input_audio": false, + "supports_native_web_search": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -64932,8 +65694,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "b5f4db53-f8da-406b-83bc-2929f11eef13", + "has_completions": false, + "id": "8a5e69a1-ea0d-4af6-899e-8b7e1c93a7a9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -64942,149 +65704,209 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 64000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-10-15T17:00:38+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", - "features": {}, - "group": "Llama3", + "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\n\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Claude", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "input_modalities": ["image", "text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Anthropic: Claude Haiku 4.5", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "anthropic/claude-4.5-haiku-20251001", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Claude Haiku 4.5", + "slug": "anthropic/claude-haiku-4.5", + "updated_at": "2025-12-05T21:53:18.541396+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_permaslug": "anthropic/claude-4.5-haiku-20251001", + "model_variant_slug": "anthropic/claude-haiku-4.5", "moderation_required": false, - "name": "Friendli | meta-llama/llama-3.3-70b-instruct", + "name": "Google | anthropic/claude-4.5-haiku-20251001", "pricing": { - "completion": "0.0000006", + "completion": "0.000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000001", + "prompt": "0.000001", + "web_search": "0.01" }, - "provider_display_name": "Friendli", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "FriendliAdapter", - "baseUrl": "https://api.friendli.ai/serverless/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Friendli", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "LGAI-EXAONE/EXAONE-4.0-32B", - "LGAI-EXAONE/EXAONE-4.0.1-32B" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Friendli", + "name": "Google", "owners": ["{}"], - "slug": "friendli", - "statusPageUrl": "https://status.friendli.ai/" + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "meta-llama-3.3-70b-instruct", - "provider_name": "Friendli", - "provider_region": null, - "provider_slug": "friendli", + "provider_model_id": "claude-haiku-4-5@20251001", + "provider_name": "Google", + "provider_region": "global", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ "max_tokens", - "temperature", "top_p", + "temperature", "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "top_k", - "min_p", - "repetition_penalty", - "response_format", - "structured_outputs", + "reasoning", + "include_reasoning", "tools", - "tool_choice" + "tool_choice", + "top_k" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Claude", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "input_modalities": ["image", "text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Anthropic: Claude Haiku 4.5", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "anthropic/claude-4.5-haiku-20251001", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Claude Haiku 4.5", + "slug": "anthropic/claude-haiku-4.5", + "updated_at": "2025-12-05T21:53:18.541396+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-05-22T16:27:25.029961+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", "endpoint": { - "adapter_name": "FriendliAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "GoogleVertexAnthropicAdapter", + "can_abort": false, + "context_length": 200000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "supports_multipart": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -65093,161 +65915,217 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "7e96e289-9cc3-45ad-8635-7a4aec720769", + "has_completions": false, + "id": "377a8014-1242-4923-ae2c-20946f6c18d8", "is_byok": false, - "is_deranked": false, + "is_deranked": true, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8000, + "max_completion_tokens": 32000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-05-22T16:27:25.029961+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", - "features": {}, - "group": "Llama4", - "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Claude", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["image", "text", "file"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Anthropic: Claude Opus 4", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "reasoning_config": null, + "permaslug": "anthropic/claude-4-opus-20250522", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Claude Opus 4", + "slug": "anthropic/claude-opus-4", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "model_variant_slug": "meta-llama/llama-4-maverick", + "model_variant_permaslug": "anthropic/claude-4-opus-20250522", + "model_variant_slug": "anthropic/claude-opus-4", "moderation_required": false, - "name": "Friendli | meta-llama/llama-4-maverick-17b-128e-instruct", + "name": "Google | anthropic/claude-4-opus-20250522", "pricing": { - "completion": "0.0000006", + "completion": "0.000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000015", + "prompt": "0.000015", + "web_search": "0.01" }, - "provider_display_name": "Friendli", + "provider_display_name": "Google Vertex (Europe)", "provider_info": { - "adapterName": "FriendliAdapter", - "baseUrl": "https://api.friendli.ai/serverless/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Friendli", + "displayName": "Google Vertex (Europe)", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "LGAI-EXAONE/EXAONE-4.0-32B", - "LGAI-EXAONE/EXAONE-4.0.1-32B" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Friendli", + "name": "Google", "owners": ["{}"], - "slug": "friendli", - "statusPageUrl": "https://status.friendli.ai/" + "slug": "google-vertex/europe", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", - "provider_name": "Friendli", - "provider_region": null, - "provider_slug": "friendli", + "provider_model_id": "claude-opus-4@20250514", + "provider_name": "Google", + "provider_region": "europe-west4", + "provider_slug": "google-vertex/europe", "quantization": "unknown", "supported_parameters": [ "max_tokens", - "temperature", "top_p", + "temperature", "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "top_k", - "min_p", - "repetition_penalty", - "response_format", - "structured_outputs" + "reasoning", + "include_reasoning", + "tools", + "tool_choice" ], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama4", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Claude", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "name": "Anthropic: Claude Opus 4", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "reasoning_config": null, + "permaslug": "anthropic/claude-4-opus-20250522", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Claude Opus 4", + "slug": "anthropic/claude-opus-4", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2025-04-05T19:31:59.735804+00:00", - "default_parameters": {}, + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-08-05T16:33:11.634562+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", "endpoint": { - "adapter_name": "FriendliAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "GoogleVertexAnthropicAdapter", + "can_abort": false, + "context_length": 200000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_multipart": false, + "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -65256,156 +66134,217 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "0ee7adf4-f9e5-47e7-b4c7-be696689a546", + "has_completions": false, + "id": "fd53ee1e-86ae-40b4-ba95-79bdce79051c", "is_byok": false, - "is_deranked": false, + "is_deranked": true, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8000, + "max_completion_tokens": 32000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 10000000, - "created_at": "2025-04-05T19:31:59.735804+00:00", - "default_parameters": {}, + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-08-05T16:33:11.634562+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", - "features": {}, - "group": "Llama4", + "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Claude", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "name": "Anthropic: Claude Opus 4.1", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", - "reasoning_config": null, + "permaslug": "anthropic/claude-4.1-opus-20250805", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Claude Opus 4.1", + "slug": "anthropic/claude-opus-4.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", - "model_variant_slug": "meta-llama/llama-4-scout", + "model_variant_permaslug": "anthropic/claude-4.1-opus-20250805", + "model_variant_slug": "anthropic/claude-opus-4.1", "moderation_required": false, - "name": "Friendli | meta-llama/llama-4-scout-17b-16e-instruct", + "name": "Google | anthropic/claude-4.1-opus-20250805", "pricing": { - "completion": "0.0000006", + "completion": "0.000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000015", + "prompt": "0.000015", + "web_search": "0.01" }, - "provider_display_name": "Friendli", + "provider_display_name": "Google Vertex (Global)", "provider_info": { - "adapterName": "FriendliAdapter", - "baseUrl": "https://api.friendli.ai/serverless/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Friendli", + "displayName": "Google Vertex (Global)", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "LGAI-EXAONE/EXAONE-4.0-32B", - "LGAI-EXAONE/EXAONE-4.0.1-32B" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Friendli", + "name": "Google", "owners": ["{}"], - "slug": "friendli", - "statusPageUrl": "https://status.friendli.ai/" + "slug": "google-vertex/global", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "provider_name": "Friendli", - "provider_region": null, - "provider_slug": "friendli", + "provider_model_id": "claude-opus-4-1@20250805", + "provider_name": "Google", + "provider_region": "global", + "provider_slug": "google-vertex/global", "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", - "temperature", "top_p", + "temperature", "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "top_k", - "min_p", - "repetition_penalty" + "reasoning", + "include_reasoning", + "tools", + "tool_choice" ], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama4", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Claude", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "name": "Anthropic: Claude Opus 4.1", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", - "reasoning_config": null, + "permaslug": "anthropic/claude-4.1-opus-20250805", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Claude Opus 4.1", + "slug": "anthropic/claude-opus-4.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-11-24T18:56:20+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection. The model is designed to operate efficiently across varied effort levels, enabling developers to trade off speed, depth, and token usage depending on task requirements. It comes with a new parameter to control token efficiency, which can be accessed using the OpenRouter Verbosity parameter with low, medium, or high.\n\nOpus 4.5 supports advanced tool use, extended context management, and coordinated multi-agent setups, making it well-suited for autonomous research, debugging, multi-step planning, and spreadsheet/browser manipulation. It delivers substantial gains in structured reasoning, execution reliability, and alignment compared to prior Opus generations, while reducing token overhead and improving performance on long-running tasks.", "endpoint": { - "adapter_name": "FriendliAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "GoogleVertexAnthropicAdapter", + "can_abort": false, + "context_length": 200000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { + "supports_input_audio": false, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -65414,8 +66353,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "02b90f2a-e684-46b3-808b-eb88e1348e29", + "has_completions": false, + "id": "f28aad62-67fa-4156-a139-0b80b28bc08f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -65424,169 +66363,204 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 64000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, + "author": "anthropic", + "context_length": 200000, + "created_at": "2025-11-24T18:56:20+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection. The model is designed to operate efficiently across varied effort levels, enabling developers to trade off speed, depth, and token usage depending on task requirements. It comes with a new parameter to control token efficiency, which can be accessed using the OpenRouter Verbosity parameter with low, medium, or high.\n\nOpus 4.5 supports advanced tool use, extended context management, and coordinated multi-agent setups, making it well-suited for autonomous research, debugging, multi-step planning, and spreadsheet/browser manipulation. It delivers substantial gains in structured reasoning, execution reliability, and alignment compared to prior Opus generations, while reducing token overhead and improving performance on long-running tasks.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", + "group": "Claude", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Anthropic: Claude Opus 4.5", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "anthropic/claude-4.5-opus-20251124", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Claude Opus 4.5", + "slug": "anthropic/claude-opus-4.5", + "updated_at": "2026-01-15T17:57:03.680811+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "anthropic/claude-4.5-opus-20251124", + "model_variant_slug": "anthropic/claude-opus-4.5", "moderation_required": false, - "name": "Friendli | qwen/qwen3-235b-a22b-07-25", + "name": "Google | anthropic/claude-4.5-opus-20251124", "pricing": { - "completion": "0.0000008", + "completion": "0.000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000005", + "prompt": "0.000005", + "web_search": "0.01" }, - "provider_display_name": "Friendli", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "FriendliAdapter", - "baseUrl": "https://api.friendli.ai/serverless/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Friendli", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "LGAI-EXAONE/EXAONE-4.0-32B", - "LGAI-EXAONE/EXAONE-4.0.1-32B" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Friendli", + "name": "Google", "owners": ["{}"], - "slug": "friendli", - "statusPageUrl": "https://status.friendli.ai/" + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", - "provider_name": "Friendli", - "provider_region": null, - "provider_slug": "friendli", + "provider_model_id": "claude-opus-4-5@20251101", + "provider_name": "Google", + "provider_region": "global", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", - "top_p", "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "top_k", - "min_p", - "repetition_penalty", - "response_format", - "structured_outputs", + "reasoning", + "include_reasoning", + "tool_choice", "tools", - "tool_choice" + "verbosity" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", + "group": "Claude", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Anthropic: Claude Opus 4.5", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "anthropic/claude-4.5-opus-20251124", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Claude Opus 4.5", + "slug": "anthropic/claude-opus-4.5", + "updated_at": "2026-01-15T17:57:03.680811+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-07-25T13:19:17.179049+00:00", + "author": "anthropic", + "context_length": 1000000, + "created_at": "2026-02-04T15:30:50.029498+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective for large codebases, complex refactors, and multi-step debugging that unfolds over time. The model shows deeper contextual understanding, stronger problem decomposition, and greater reliability on hard engineering tasks than prior generations.\n\nBeyond coding, Opus 4.6 excels at sustained knowledge work. It produces near-production-ready documents, plans, and analyses in a single pass, and maintains coherence across very long outputs and extended sessions. This makes it a strong default for tasks that require persistence, judgment, and follow-through, such as technical design, migration planning, and end-to-end project execution.\n\nFor users upgrading from earlier Opus versions, see our [official migration guide here](https://openrouter.ai/docs/guides/guides/model-migrations/claude-4-6-opus)\n", "endpoint": { - "adapter_name": "FriendliAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "GoogleVertexAnthropicAdapter", + "can_abort": false, + "context_length": 1000000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { @@ -65598,8 +66572,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "f49b0831-ff8f-458a-bcf5-7401c3a50ac6", + "has_completions": false, + "id": "8dba71f8-55ed-4f21-bdc3-0d46b9e3f247", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -65608,184 +66582,214 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-25T13:19:17.179049+00:00", + "author": "anthropic", + "context_length": 1000000, + "created_at": "2026-02-04T15:30:50.029498+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective for large codebases, complex refactors, and multi-step debugging that unfolds over time. The model shows deeper contextual understanding, stronger problem decomposition, and greater reliability on hard engineering tasks than prior generations.\n\nBeyond coding, Opus 4.6 excels at sustained knowledge work. It produces near-production-ready documents, plans, and analyses in a single pass, and maintains coherence across very long outputs and extended sessions. This makes it a strong default for tasks that require persistence, judgment, and follow-through, such as technical design, migration planning, and end-to-end project execution.\n\nFor users upgrading from earlier Opus versions, see our [official migration guide here](https://openrouter.ai/docs/guides/guides/model-migrations/claude-4-6-opus)\n", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, - "group": "Qwen3", + "group": "Claude", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwen3", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "name": "Anthropic: Claude Opus 4.6", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "permaslug": "anthropic/claude-4.6-opus-20260205", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "Claude Opus 4.6", + "slug": "anthropic/claude-opus-4.6", + "updated_at": "2026-02-05T17:50:47.549608+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", - "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_permaslug": "anthropic/claude-4.6-opus-20260205", + "model_variant_slug": "anthropic/claude-opus-4.6", "moderation_required": false, - "name": "Friendli | qwen/qwen3-235b-a22b-thinking-2507", + "name": "Google | anthropic/claude-4.6-opus-20260205", "pricing": { - "completion": "0.0000024", + "completion": "0.000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000005", + "prompt": "0.000005", + "web_search": "0.01" }, - "provider_display_name": "Friendli", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "FriendliAdapter", - "baseUrl": "https://api.friendli.ai/serverless/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Friendli", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "LGAI-EXAONE/EXAONE-4.0-32B", - "LGAI-EXAONE/EXAONE-4.0.1-32B" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Friendli", + "name": "Google", "owners": ["{}"], - "slug": "friendli", - "statusPageUrl": "https://status.friendli.ai/" + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", - "provider_name": "Friendli", - "provider_region": null, - "provider_slug": "friendli", + "provider_model_id": "claude-opus-4-6", + "provider_name": "Google", + "provider_region": "global", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", - "temperature", "top_p", + "temperature", "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "top_k", - "min_p", - "repetition_penalty", - "response_format", - "structured_outputs", + "reasoning", + "include_reasoning", + "tool_choice", "tools", - "tool_choice" + "structured_outputs", + "response_format", + "verbosity" ], "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "completions": "0.0000375", + "prompt": "0.00001", + "threshold": 200000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, - "group": "Qwen3", + "group": "Claude", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwen3", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "name": "Anthropic: Claude Opus 4.6", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "permaslug": "anthropic/claude-4.6-opus-20260205", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "Claude Opus 4.6", + "slug": "anthropic/claude-opus-4.6", + "updated_at": "2026-02-05T17:50:47.549608+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T22:16:44.177326+00:00", + "author": "anthropic", + "context_length": 1000000, + "created_at": "2025-05-22T16:12:51.381897+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%), Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions. Sonnet 4 is optimized for practical everyday use, providing advanced reasoning capabilities while maintaining efficiency and responsiveness in diverse internal and external scenarios.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", "endpoint": { - "adapter_name": "FriendliAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "GoogleVertexAnthropicAdapter", + "can_abort": false, + "context_length": 1000000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -65794,8 +66798,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "6a005fee-4205-438d-a938-11f35a039fd3", + "has_completions": false, + "id": "5e3f0568-c089-416a-ab7b-6c432999b571", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -65804,180 +66808,218 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8000, + "max_completion_tokens": 64000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T22:16:44.177326+00:00", + "author": "anthropic", + "context_length": 1000000, + "created_at": "2025-05-22T16:12:51.381897+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%), Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions. Sonnet 4 is optimized for practical everyday use, providing advanced reasoning capabilities while maintaining efficiency and responsiveness in diverse internal and external scenarios.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Qwen3", + "group": "Claude", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwen3", + "input_modalities": ["image", "text", "file"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "name": "Anthropic: Claude Sonnet 4", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "permaslug": "anthropic/claude-4-sonnet-20250522", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "Claude Sonnet 4", + "slug": "anthropic/claude-sonnet-4", + "updated_at": "2025-12-05T21:53:41.372783+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", - "model_variant_slug": "qwen/qwen3-30b-a3b", + "model_variant_permaslug": "anthropic/claude-4-sonnet-20250522", + "model_variant_slug": "anthropic/claude-sonnet-4", "moderation_required": false, - "name": "Friendli | qwen/qwen3-30b-a3b-04-28", + "name": "Google | anthropic/claude-4-sonnet-20250522", "pricing": { - "completion": "0.0000006", + "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000003", + "prompt": "0.000003", + "web_search": "0.01" }, - "provider_display_name": "Friendli", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "FriendliAdapter", - "baseUrl": "https://api.friendli.ai/serverless/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Friendli", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "LGAI-EXAONE/EXAONE-4.0-32B", - "LGAI-EXAONE/EXAONE-4.0.1-32B" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Friendli", + "name": "Google", "owners": ["{}"], - "slug": "friendli", - "statusPageUrl": "https://status.friendli.ai/" + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "Qwen/Qwen3-30B-A3B", - "provider_name": "Friendli", + "provider_model_id": "claude-sonnet-4@20250514", + "provider_name": "Google", "provider_region": null, - "provider_slug": "friendli", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", - "temperature", "top_p", + "temperature", "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "top_k", - "min_p", - "repetition_penalty", + "reasoning", + "include_reasoning", "tools", "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "completions": "0.0000225", + "input_cache_read": "0.0000006", + "input_cache_write": "0.0000075", + "prompt": "0.000006", + "threshold": 200000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Qwen3", + "group": "Claude", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwen3", + "input_modalities": ["image", "text", "file"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "name": "Anthropic: Claude Sonnet 4", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "permaslug": "anthropic/claude-4-sonnet-20250522", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "Claude Sonnet 4", + "slug": "anthropic/claude-sonnet-4", + "updated_at": "2025-12-05T21:53:41.372783+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:32:25.189881+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "author": "anthropic", + "context_length": 1000000, + "created_at": "2025-09-29T16:01:16.552976+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 1 + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with improvements across system design, code security, and specification adherence. The model is designed for extended autonomous operation, maintaining task continuity across sessions and providing fact-based progress tracking.\n\nSonnet 4.5 also introduces stronger agentic capabilities, including improved tool orchestration, speculative parallel execution, and more efficient context and memory management. With enhanced context tracking and awareness of token usage across tool calls, it is particularly well-suited for multi-context and long-running workflows. Use cases span software engineering, cybersecurity, financial analysis, research agents, and other domains requiring sustained reasoning and tool use.", "endpoint": { - "adapter_name": "FriendliAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "GoogleVertexAnthropicAdapter", + "can_abort": false, + "context_length": 1000000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -65986,8 +67028,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "25750390-ad32-494d-baa7-ad6d857e7f46", + "has_completions": false, + "id": "83c3ce47-c679-4cb1-9e21-a5defad78b61", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -65996,180 +67038,225 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8000, + "max_completion_tokens": 64000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:32:25.189881+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "author": "anthropic", + "context_length": 1000000, + "created_at": "2025-09-29T16:01:16.552976+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 1 + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with improvements across system design, code security, and specification adherence. The model is designed for extended autonomous operation, maintaining task continuity across sessions and providing fact-based progress tracking.\n\nSonnet 4.5 also introduces stronger agentic capabilities, including improved tool orchestration, speculative parallel execution, and more efficient context and memory management. With enhanced context tracking and awareness of token usage across tool calls, it is particularly well-suited for multi-context and long-running workflows. Use cases span software engineering, cybersecurity, financial analysis, research agents, and other domains requiring sustained reasoning and tool use.", "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen3", + "group": "Claude", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwen3", + "input_modalities": ["text", "image", "file"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "Anthropic: Claude Sonnet 4.5", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "permaslug": "anthropic/claude-4.5-sonnet-20250929", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", + "short_name": "Claude Sonnet 4.5", + "slug": "anthropic/claude-sonnet-4.5", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-32b-04-28", - "model_variant_slug": "qwen/qwen3-32b", + "model_variant_permaslug": "anthropic/claude-4.5-sonnet-20250929", + "model_variant_slug": "anthropic/claude-sonnet-4.5", "moderation_required": false, - "name": "Friendli | qwen/qwen3-32b-04-28", + "name": "Google | anthropic/claude-4.5-sonnet-20250929", "pricing": { - "completion": "0.0000005", + "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000003", + "prompt": "0.000003", + "web_search": "0.01" }, - "provider_display_name": "Friendli", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "FriendliAdapter", - "baseUrl": "https://api.friendli.ai/serverless/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Friendli", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "LGAI-EXAONE/EXAONE-4.0-32B", - "LGAI-EXAONE/EXAONE-4.0.1-32B" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Friendli", + "name": "Google", "owners": ["{}"], - "slug": "friendli", - "statusPageUrl": "https://status.friendli.ai/" + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "Qwen/Qwen3-32B", - "provider_name": "Friendli", - "provider_region": null, - "provider_slug": "friendli", + "provider_model_id": "claude-sonnet-4-5@20250929", + "provider_name": "Google", + "provider_region": "us-east5", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", - "temperature", "top_p", + "temperature", "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "top_k", - "min_p", - "repetition_penalty", + "reasoning", + "include_reasoning", "tools", - "tool_choice" + "tool_choice", + "top_k" ], "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "completions": "0.0000225", + "input_cache_read": "0.0000006", + "input_cache_write": "0.0000075", + "prompt": "0.000006", + "threshold": 200000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen3", + "group": "Claude", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwen3", + "input_modalities": ["text", "image", "file"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "Anthropic: Claude Sonnet 4.5", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "permaslug": "anthropic/claude-4.5-sonnet-20250929", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", + "short_name": "Claude Sonnet 4.5", + "slug": "anthropic/claude-sonnet-4.5", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-09-30T12:32:56.306946+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": null - }, - "default_stops": [], + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-08-21T12:33:48+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", "endpoint": { - "adapter_name": "FriendliAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "VertexOpenAIAdapter", + "can_abort": false, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, - "literal_none": false, + "literal_none": true, "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": true, - "id": "4929f6aa-3dc2-47ae-80fd-626ad6fd8199", + "has_completions": false, + "id": "26971f72-0b43-4255-a39d-98da9ca32119", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -66178,117 +67265,144 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 200000, - "created_at": "2025-09-30T12:32:56.306946+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": null - }, - "default_stops": [], + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-08-21T12:33:48+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "Other", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": null, + "hf_slug": "deepseek-ai/DeepSeek-V3.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "DeepSeek: DeepSeek V3.1", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "deepseek/deepseek-chat-v3.1", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "DeepSeek V3.1", + "slug": "deepseek/deepseek-chat-v3.1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6", - "model_variant_slug": "z-ai/glm-4.6", + "model_variant_permaslug": "deepseek/deepseek-chat-v3.1", + "model_variant_slug": "deepseek/deepseek-chat-v3.1", "moderation_required": false, - "name": "Friendli | z-ai/glm-4.6", + "name": "Google | deepseek/deepseek-chat-v3.1", "pricing": { - "completion": "0.0000022", + "completion": "0.0000017", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000003", + "prompt": "0.0000006" }, - "provider_display_name": "Friendli", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "FriendliAdapter", - "baseUrl": "https://api.friendli.ai/serverless/v1", + "adapterName": "VertexOpenAIAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://friendli.ai/privacypolicy", - "retainsPrompts": true, - "termsOfServiceURL": "https://friendli.ai/terms-of-service", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Friendli", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://friendli.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "LGAI-EXAONE/EXAONE-4.0-32B", - "LGAI-EXAONE/EXAONE-4.0.1-32B" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Friendli", + "name": "Google", "owners": ["{}"], - "slug": "friendli", - "statusPageUrl": "https://status.friendli.ai/" + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "zai-org/GLM-4.6", - "provider_name": "Friendli", - "provider_region": null, - "provider_slug": "friendli", + "provider_model_id": "deepseek-ai/deepseek-v3.1-maas", + "provider_name": "Google", + "provider_region": "us-west2", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", + "seed", "stop", "frequency_penalty", "presence_penalty", - "seed", "top_k", - "min_p", "repetition_penalty", - "response_format", - "structured_outputs", "tools", "tool_choice" ], @@ -66299,87 +67413,70 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "Other", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": null, + "hf_slug": "deepseek-ai/DeepSeek-V3.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "DeepSeek: DeepSeek V3.1", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "deepseek/deepseek-chat-v3.1", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "DeepSeek V3.1", + "slug": "deepseek/deepseek-chat-v3.1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Friendli", - "slug": "friendli" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": false - }, - "datacenters": ["US"], - "displayName": "GMICloud", - "headquarters": "US", - "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" - }, - "models": [ + }, { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-12T05:12:39.645813+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-12-01T13:10:42.818885+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, - "default_stops": ["", "", ""], + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "endpoint": { - "adapter_name": "GMICloudAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "VertexOpenAIAdapter", + "can_abort": false, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": true, - "id": "50d819f6-5ef5-4630-b2bb-b700dfe47c49", + "has_completions": false, + "id": "a933135e-3dfc-49e2-b9ad-be0bcd2bcdf2", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -66388,181 +67485,216 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", + "author": "deepseek", "context_length": 131072, - "created_at": "2025-03-12T05:12:39.645813+00:00", + "created_at": "2025-12-01T13:10:42.818885+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, - "default_stops": ["", "", ""], + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "hf_slug": "deepseek-ai/DeepSeek-V3.2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "DeepSeek: DeepSeek V3.2", "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", + "permaslug": "deepseek/deepseek-v3.2-20251201", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "DeepSeek V3.2", + "slug": "deepseek/deepseek-v3.2", + "updated_at": "2025-12-01T14:46:05.824401+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-3-27b-it", - "model_variant_slug": "google/gemma-3-27b-it", + "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", + "model_variant_slug": "deepseek/deepseek-v3.2", "moderation_required": false, - "name": "GMICloud | google/gemma-3-27b-it", + "name": "Google | deepseek/deepseek-v3.2-20251201", "pricing": { - "completion": "0.00000016", + "completion": "0.00000168", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.00000056" }, - "provider_display_name": "GMICloud", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GMICloudAdapter", - "baseUrl": "https://api.gmi-serving.com/v1", + "adapterName": "VertexOpenAIAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "GMICloud", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "meta-llama/Llama-3.3-70B-Instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8", - "meta-llama/Llama-3.1-8B-Instruct", - "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", - "deepseek-ai/DeepSeek-V3.1-Terminus", - "allenai/olmOCR-2-7B-1025-FP8", - "tencent/HunyuanOCR", - "google/gemini-3-flash-preview", - "google/gemini-3-pro-preview" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "GMICloud", + "name": "Google", "owners": ["{}"], - "slug": "gmicloud", - "statusPageUrl": null + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "google/gemma-3-27b-it", - "provider_name": "GMICloud", + "provider_model_id": "deepseek-ai/deepseek-v3.2-maas", + "provider_name": "Google", "provider_region": null, - "provider_slug": "gmicloud/bf16", - "quantization": "bf16", + "provider_slug": "google-vertex", + "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "seed", - "structured_outputs", - "response_format" + "response_format", + "stop", + "frequency_penalty", + "presence_penalty", + "top_k", + "repetition_penalty", + "tools", + "tool_choice", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "hf_slug": "deepseek-ai/DeepSeek-V3.2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "DeepSeek: DeepSeek V3.2", "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", + "permaslug": "deepseek/deepseek-v3.2-20251201", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "DeepSeek V3.2", + "slug": "deepseek/deepseek-v3.2", + "updated_at": "2025-12-01T14:46:05.824401+00:00", "warning_message": null }, { - "author": "minimax", - "context_length": 196608, - "created_at": "2025-12-23T01:56:37+00:00", + "author": "google", + "context_length": 1000000, + "created_at": "2025-02-05T15:30:13.144552+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.9 + "temperature": null, + "top_p": null }, "default_stops": [], - "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", - "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "default_system": null, + "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", "endpoint": { - "adapter_name": "GMICloudAdapter", - "can_abort": true, - "context_length": 196608, + "adapter_name": "GoogleVertexGeminiAdapter", + "can_abort": false, + "context_length": 1000000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": {}, + "supports_base64_video_input": true, + "supports_file_urls": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -66571,8 +67703,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "e2e6bf21-c96f-415b-bdeb-9cd90fe74eb2", + "has_completions": false, + "id": "65001dcf-7a48-4a35-9405-f419233ad7fc", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -66581,166 +67713,188 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-12-23T01:56:37+00:00", + "author": "google", + "context_length": 1000000, + "created_at": "2025-02-05T15:30:13.144552+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.9 + "temperature": null, + "top_p": null }, "default_stops": [], - "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", - "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "default_system": null, + "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": null, - "name": "MiniMax: MiniMax M2.1", + "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", + "name": "Google: Gemini 2.0 Flash", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2.1", + "permaslug": "google/gemini-2.0-flash-001", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "MiniMax M2.1", - "slug": "minimax/minimax-m2.1", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.0 Flash", + "slug": "google/gemini-2.0-flash-001", + "updated_at": "2025-11-14T23:34:05.685679+00:00", "warning_message": null }, - "model_variant_permaslug": "minimax/minimax-m2.1", - "model_variant_slug": "minimax/minimax-m2.1", + "model_variant_permaslug": "google/gemini-2.0-flash-001", + "model_variant_slug": "google/gemini-2.0-flash-001", "moderation_required": false, - "name": "GMICloud | minimax/minimax-m2.1", + "name": "Google | google/gemini-2.0-flash-001", "pricing": { - "completion": "0.0000012", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "image": "0.0000001", + "input_cache_read": "0.000000025", + "internal_reasoning": "0.0000004", + "prompt": "0.0000001" }, - "provider_display_name": "GMICloud", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GMICloudAdapter", - "baseUrl": "https://api.gmi-serving.com/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "GMICloud", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "meta-llama/Llama-3.3-70B-Instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8", - "meta-llama/Llama-3.1-8B-Instruct", - "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", - "deepseek-ai/DeepSeek-V3.1-Terminus", - "allenai/olmOCR-2-7B-1025-FP8", - "tencent/HunyuanOCR", - "google/gemini-3-flash-preview", - "google/gemini-3-pro-preview" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "GMICloud", + "name": "Google", "owners": ["{}"], - "slug": "gmicloud", - "statusPageUrl": null + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "MiniMaxAI/MiniMax-M2.1", - "provider_name": "GMICloud", + "provider_model_id": "gemini-2.0-flash-001", + "provider_name": "Google", "provider_region": null, - "provider_slug": "gmicloud/fp8", - "quantization": "fp8", + "provider_slug": "google-vertex", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "seed", + "response_format", + "stop", + "structured_outputs", "tools", - "tool_choice", - "response_format" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": null, - "name": "MiniMax: MiniMax M2.1", + "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", + "name": "Google: Gemini 2.0 Flash", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2.1", + "permaslug": "google/gemini-2.0-flash-001", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "MiniMax M2.1", - "slug": "minimax/minimax-m2.1", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.0 Flash", + "slug": "google/gemini-2.0-flash-001", + "updated_at": "2025-11-14T23:34:05.685679+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "author": "google", + "context_length": 1048576, + "created_at": "2025-02-25T17:56:52.206054+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -66748,31 +67902,34 @@ }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.", "endpoint": { - "adapter_name": "GMICloudAdapter", - "can_abort": true, - "context_length": 262144, + "adapter_name": "GoogleVertexGeminiAdapter", + "can_abort": false, + "context_length": 1048576, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": {}, + "supports_base64_video_input": true, + "supports_file_urls": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, - "type_function": false + "type_function": true } }, "has_chat_completions": true, - "has_completions": true, - "id": "7f7c22db-14d8-41e4-b60d-c3c6a690a1ef", + "has_completions": false, + "id": "4dc03b46-4015-495b-acff-1dcacb6c3311", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -66781,13 +67938,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "author": "google", + "context_length": 1048576, + "created_at": "2025-02-25T17:56:52.206054+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -66795,147 +67952,173 @@ }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", + "name": "Google: Gemini 2.0 Flash Lite", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "google/gemini-2.0-flash-lite-001", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.0 Flash Lite", + "slug": "google/gemini-2.0-flash-lite-001", + "updated_at": "2025-11-14T23:32:47.563595+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", + "model_variant_permaslug": "google/gemini-2.0-flash-lite-001", + "model_variant_slug": "google/gemini-2.0-flash-lite-001", "moderation_required": false, - "name": "GMICloud | moonshotai/kimi-k2-thinking-20251106", + "name": "Google | google/gemini-2.0-flash-lite-001", "pricing": { - "completion": "0.0000012", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000008", - "request": "0", - "web_search": "0" + "image": "0.000000075", + "internal_reasoning": "0.0000003", + "prompt": "0.000000075" }, - "provider_display_name": "GMICloud", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GMICloudAdapter", - "baseUrl": "https://api.gmi-serving.com/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "GMICloud", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "meta-llama/Llama-3.3-70B-Instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8", - "meta-llama/Llama-3.1-8B-Instruct", - "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", - "deepseek-ai/DeepSeek-V3.1-Terminus", - "allenai/olmOCR-2-7B-1025-FP8", - "tencent/HunyuanOCR", - "google/gemini-3-flash-preview", - "google/gemini-3-pro-preview" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "GMICloud", + "name": "Google", "owners": ["{}"], - "slug": "gmicloud", - "statusPageUrl": null + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "moonshotai/Kimi-K2-Thinking", - "provider_name": "GMICloud", + "provider_model_id": "gemini-2.0-flash-lite-001", + "provider_name": "Google", "provider_region": null, - "provider_slug": "gmicloud/int4", - "quantization": "int4", + "provider_slug": "google-vertex", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "seed", + "response_format", + "stop", "structured_outputs", - "response_format" + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", + "name": "Google: Gemini 2.0 Flash Lite", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "google/gemini-2.0-flash-lite-001", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.0 Flash Lite", + "slug": "google/gemini-2.0-flash-lite-001", + "updated_at": "2025-11-14T23:32:47.563595+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "google", + "context_length": 1048576, + "created_at": "2025-06-17T15:01:28.103313+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -66943,24 +68126,27 @@ }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", "endpoint": { - "adapter_name": "GMICloudAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "GoogleVertexGeminiAdapter", + "can_abort": false, + "context_length": 1048576, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { - "response_format": false, - "structured_outputs": false + "response_format": true, + "structured_outputs": true }, + "supports_base64_video_input": true, + "supports_file_urls": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -66969,8 +68155,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "8f211f03-1f5a-4dc3-83d7-33fa42966200", + "has_completions": false, + "id": "b9b095f0-6448-46bb-8e4c-0111ba2bf1b0", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -66979,13 +68165,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 65535, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "google", + "context_length": 1048576, + "created_at": "2025-06-17T15:01:28.103313+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -66993,108 +68179,135 @@ }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Gemini", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["file", "image", "text", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Google: Gemini 2.5 Flash", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "google/gemini-2.5-flash", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Flash", + "slug": "google/gemini-2.5-flash", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b", - "model_variant_slug": "openai/gpt-oss-120b", + "model_variant_permaslug": "google/gemini-2.5-flash", + "model_variant_slug": "google/gemini-2.5-flash", "moderation_required": false, - "name": "GMICloud | openai/gpt-oss-120b", + "name": "Google | google/gemini-2.5-flash", "pricing": { - "completion": "0.00000025", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "image": "0.0000003", + "input_cache_read": "0.00000003", + "internal_reasoning": "0.0000025", + "prompt": "0.0000003" }, - "provider_display_name": "GMICloud", + "provider_display_name": "Google Vertex (Global)", "provider_info": { - "adapterName": "GMICloudAdapter", - "baseUrl": "https://api.gmi-serving.com/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "GMICloud", + "displayName": "Google Vertex (Global)", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "meta-llama/Llama-3.3-70B-Instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8", - "meta-llama/Llama-3.1-8B-Instruct", - "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", - "deepseek-ai/DeepSeek-V3.1-Terminus", - "allenai/olmOCR-2-7B-1025-FP8", - "tencent/HunyuanOCR", - "google/gemini-3-flash-preview", - "google/gemini-3-pro-preview" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "GMICloud", + "name": "Google", "owners": ["{}"], - "slug": "gmicloud", - "statusPageUrl": null + "slug": "google-vertex/global", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "openai/gpt-oss-120b", - "provider_name": "GMICloud", - "provider_region": null, - "provider_slug": "gmicloud/fp4", - "quantization": "fp4", + "provider_model_id": "gemini-2.5-flash", + "provider_name": "Google", + "provider_region": "global", + "provider_slug": "google-vertex/global", + "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", - "tool_choice", + "stop", "tools", - "structured_outputs", - "response_format" + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, @@ -67105,37 +68318,37 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Gemini", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["file", "image", "text", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Google: Gemini 2.5 Flash", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "google/gemini-2.5-flash", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Flash", + "slug": "google/gemini-2.5-flash", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "author": "google", + "context_length": 32768, + "created_at": "2025-10-07T20:53:51+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -67143,20 +68356,22 @@ }, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", "endpoint": { - "adapter_name": "GMICloudAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "GoogleVertexGeminiAdapter", + "can_abort": false, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "is_mandatory_reasoning": true, + "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -67165,23 +68380,23 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "733b1229-c8cd-405a-a958-e6e9854847fe", + "has_completions": false, + "id": "9fc81a95-52d2-4a94-adc5-f99869ab1351", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 1000, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "author": "google", + "context_length": 32768, + "created_at": "2025-10-07T20:53:51+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -67189,169 +68404,198 @@ }, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Gemini", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", - "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", + "output_modalities": ["image", "text"], + "permaslug": "google/gemini-2.5-flash-image", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Flash Image (Nano Banana)", + "slug": "google/gemini-2.5-flash-image", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "google/gemini-2.5-flash-image", + "model_variant_slug": "google/gemini-2.5-flash-image", "moderation_required": false, - "name": "GMICloud | openai/gpt-oss-20b", + "name": "Google | google/gemini-2.5-flash-image", "pricing": { - "completion": "0.00000015", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "image": "0.0000003", + "image_output": "0.00003", + "input_cache_read": "0.00000003", + "internal_reasoning": "0.0000025", + "prompt": "0.0000003" }, - "provider_display_name": "GMICloud", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GMICloudAdapter", - "baseUrl": "https://api.gmi-serving.com/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "GMICloud", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "meta-llama/Llama-3.3-70B-Instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8", - "meta-llama/Llama-3.1-8B-Instruct", - "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", - "deepseek-ai/DeepSeek-V3.1-Terminus", - "allenai/olmOCR-2-7B-1025-FP8", - "tencent/HunyuanOCR", - "google/gemini-3-flash-preview", - "google/gemini-3-pro-preview" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "GMICloud", + "name": "Google", "owners": ["{}"], - "slug": "gmicloud", - "statusPageUrl": null + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "openai/gpt-oss-20b", - "provider_name": "GMICloud", - "provider_region": null, - "provider_slug": "gmicloud/fp4", - "quantization": "fp4", + "provider_model_id": "gemini-2.5-flash-image", + "provider_name": "Google", + "provider_region": "global", + "provider_slug": "google-vertex", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "seed", - "tool_choice", - "tools", - "structured_outputs", - "response_format" + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Gemini", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", - "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", + "output_modalities": ["image", "text"], + "permaslug": "google/gemini-2.5-flash-image", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Flash Image (Nano Banana)", + "slug": "google/gemini-2.5-flash-image", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", - "default_parameters": {}, + "author": "google", + "context_length": 1048576, + "created_at": "2025-07-22T16:04:36.283638+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", "endpoint": { - "adapter_name": "GMICloudAdapter", - "can_abort": true, - "context_length": 262144, + "adapter_name": "GoogleVertexGeminiAdapter", + "can_abort": false, + "context_length": 1048576, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_base64_video_input": true, + "supports_file_urls": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -67360,8 +68604,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "9d33dd86-b592-406b-965f-f91977020855", + "has_completions": false, + "id": "ebdbef25-737d-4ac3-9e98-fd3928724e45", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -67370,173 +68614,218 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 65535, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", - "default_parameters": {}, + "author": "google", + "context_length": 1048576, + "created_at": "2025-07-22T16:04:36.283638+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", + "group": "Gemini", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "name": "Google: Gemini 2.5 Flash Lite", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "permaslug": "google/gemini-2.5-flash-lite", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", + "short_name": "Gemini 2.5 Flash Lite", + "slug": "google/gemini-2.5-flash-lite", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", + "model_variant_permaslug": "google/gemini-2.5-flash-lite", + "model_variant_slug": "google/gemini-2.5-flash-lite", "moderation_required": false, - "name": "GMICloud | qwen/qwen3-next-80b-a3b-instruct-2509", + "name": "Google | google/gemini-2.5-flash-lite", "pricing": { - "completion": "0.0000015", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "image": "0.0000001", + "input_cache_read": "0.00000001", + "internal_reasoning": "0.0000004", + "prompt": "0.0000001" }, - "provider_display_name": "GMICloud", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GMICloudAdapter", - "baseUrl": "https://api.gmi-serving.com/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "GMICloud", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "meta-llama/Llama-3.3-70B-Instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8", - "meta-llama/Llama-3.1-8B-Instruct", - "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", - "deepseek-ai/DeepSeek-V3.1-Terminus", - "allenai/olmOCR-2-7B-1025-FP8", - "tencent/HunyuanOCR", - "google/gemini-3-flash-preview", - "google/gemini-3-pro-preview" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "GMICloud", + "name": "Google", "owners": ["{}"], - "slug": "gmicloud", - "statusPageUrl": null + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct", - "provider_name": "GMICloud", + "provider_model_id": "gemini-2.5-flash-lite", + "provider_name": "Google", "provider_region": null, - "provider_slug": "gmicloud/fp8", - "quantization": "fp8", + "provider_slug": "google-vertex", + "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", + "stop", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", + "group": "Gemini", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "name": "Google: Gemini 2.5 Flash Lite", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "permaslug": "google/gemini-2.5-flash-lite", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", + "short_name": "Gemini 2.5 Flash Lite", + "slug": "google/gemini-2.5-flash-lite", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-23T23:04:47+00:00", + "author": "google", + "context_length": 1048576, + "created_at": "2025-09-25T17:01:26.198818+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", "endpoint": { - "adapter_name": "GMICloudAdapter", - "can_abort": true, - "context_length": 262144, + "adapter_name": "GoogleVertexGeminiAdapter", + "can_abort": false, + "context_length": 1048576, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_base64_video_input": true, + "supports_file_urls": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -67545,8 +68834,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "b90705c3-9b23-4cc2-8366-59ba6666980d", + "has_completions": false, + "id": "6e63b201-b2cc-45d9-b0c0-da687a96efa0", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -67555,180 +68844,219 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 65535, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-23T23:04:47+00:00", + "author": "google", + "context_length": 1048576, + "created_at": "2025-09-25T17:01:26.198818+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", + "group": "Gemini", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", + "permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 235B A22B Instruct", - "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "short_name": "Gemini 2.5 Flash Lite Preview 09-2025", + "slug": "google/gemini-2.5-flash-lite-preview-09-2025", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", + "model_variant_permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", + "model_variant_slug": "google/gemini-2.5-flash-lite-preview-09-2025", "moderation_required": false, - "name": "GMICloud | qwen/qwen3-vl-235b-a22b-instruct", + "name": "Google | google/gemini-2.5-flash-lite-preview-09-2025", "pricing": { - "completion": "0.0000014", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "image": "0.0000001", + "input_cache_read": "0.00000001", + "internal_reasoning": "0.0000004", + "prompt": "0.0000001" }, - "provider_display_name": "GMICloud", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GMICloudAdapter", - "baseUrl": "https://api.gmi-serving.com/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "GMICloud", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "meta-llama/Llama-3.3-70B-Instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8", - "meta-llama/Llama-3.1-8B-Instruct", - "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", - "deepseek-ai/DeepSeek-V3.1-Terminus", - "allenai/olmOCR-2-7B-1025-FP8", - "tencent/HunyuanOCR", - "google/gemini-3-flash-preview", - "google/gemini-3-pro-preview" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "GMICloud", + "name": "Google", "owners": ["{}"], - "slug": "gmicloud", - "statusPageUrl": null + "slug": "google-vertex", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "Qwen/Qwen3-VL-235B-A22B-Instruct-FP8", - "provider_name": "GMICloud", - "provider_region": null, - "provider_slug": "gmicloud/fp8", - "quantization": "fp8", + "provider_model_id": "gemini-2.5-flash-lite-preview-09-2025", + "provider_name": "Google", + "provider_region": "global", + "provider_slug": "google-vertex", + "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", + "stop", "tools", - "tool_choice", - "structured_outputs", - "response_format" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", + "group": "Gemini", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", + "permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 235B A22B Instruct", - "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "short_name": "Gemini 2.5 Flash Lite Preview 09-2025", + "slug": "google/gemini-2.5-flash-lite-preview-09-2025", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 202752, - "created_at": "2025-12-22T04:33:34.884504+00:00", + "author": "google", + "context_length": 1048576, + "created_at": "2025-06-17T14:12:24+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", "endpoint": { - "adapter_name": "GMICloudAdapter", - "can_abort": true, - "context_length": 202752, + "adapter_name": "GoogleVertexGeminiAdapter", + "can_abort": false, + "context_length": 1048576, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "reasoning_return_mechanism": "reasoning-content", + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_base64_video_input": true, + "supports_file_urls": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -67737,195 +69065,225 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "265fd495-fcf1-4b2e-8adc-d689c1814e93", + "has_completions": false, + "id": "43106252-bd69-498f-84f5-1fde678783f7", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 300, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 200000, - "created_at": "2025-12-22T04:33:34.884504+00:00", + "author": "google", + "context_length": 1048576, + "created_at": "2025-06-17T14:12:24+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.7", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.7", + "name": "Google: Gemini 2.5 Pro", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.7-20251222", + "permaslug": "google/gemini-2.5-pro", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.7", - "slug": "z-ai/glm-4.7", - "updated_at": "2026-01-07T19:34:06.523149+00:00", + "short_name": "Gemini 2.5 Pro", + "slug": "google/gemini-2.5-pro", + "updated_at": "2026-01-08T23:55:54.79011+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.7-20251222", - "model_variant_slug": "z-ai/glm-4.7", + "model_variant_permaslug": "google/gemini-2.5-pro", + "model_variant_slug": "google/gemini-2.5-pro", "moderation_required": false, - "name": "GMICloud | z-ai/glm-4.7-20251222", + "name": "Google | google/gemini-2.5-pro", "pricing": { - "completion": "0.000002", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "image": "0.00000125", + "input_cache_read": "0.000000125", + "internal_reasoning": "0.00001", + "prompt": "0.00000125" }, - "provider_display_name": "GMICloud", + "provider_display_name": "Google Vertex (Global)", "provider_info": { - "adapterName": "GMICloudAdapter", - "baseUrl": "https://api.gmi-serving.com/v1", + "adapterName": "GoogleVertexGeminiAdapter", + "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.gmicloud.ai/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.gmicloud.ai/terms-and-conditions", + "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", + "requiresUserIDs": true, + "retainsPrompts": false, + "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "GMICloud", + "displayName": "Google Vertex (Global)", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://gmicloud.ai/&size=256" + "url": "/images/icons/GoogleVertex.svg" }, "ignoredProviderModels": [ - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", - "meta-llama/Llama-3.3-70B-Instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507-FP8", - "meta-llama/Llama-3.1-8B-Instruct", - "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8", - "deepseek-ai/DeepSeek-V3.1-Terminus", - "allenai/olmOCR-2-7B-1025-FP8", - "tencent/HunyuanOCR", - "google/gemini-3-flash-preview", - "google/gemini-3-pro-preview" + "gemini-2.5-pro-exp-03-25", + "gemini-2.0-flash-exp", + "gemini-1.5-flash-002", + "gemini-2.0-flash-lite-001", + "gemini-2.5-flash-lite-preview-06-17", + "gemini-2.5-flash-lite", + "gemini-2.0-flash-001", + "llama-4-scout-17b-16e-instruct-maas", + "qwen3-235b-a22b-instruct-2507-maas", + "gemini-2.5-flash", + "gemini-2.5-flash-image-preview", + "llama-4-maverick-17b-128e-instruct-maas", + "llama-3.3-70b-instruct-maas", + "claude-3-5-haiku@20241022", + "qwen3-coder-480b-a35b-instruct-maas", + "gemini-1.5-pro-002", + "gemini-2.5-pro-preview-06-05", + "gemini-2.5-pro-preview-05-06", + "gemini-2.5-pro", + "deepseek-r1-0528-maas", + "claude-3-7-sonnet@20250219", + "claude-3-5-sonnet-v2@20241022", + "claude-sonnet-4@20250514", + "claude-opus-4-1@20250805", + "claude-opus-4@20250514", + "claude-3-haiku@20240307", + "claude-3-5-sonnet@20240620", + "claude-3-opus@20240229", + "gemini-2.5-flash-lite-preview-09-2025", + "gemini-2.5-flash-preview-09-2025", + "gemini-2.5-flash-image", + "claude-sonnet-4-5@20250929", + "claude-haiku-4-5@20251001", + "minimax/minimax-m2-maas", + "gemini-3-pro-preview", + "gemini-3-pro-image-preview", + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "GMICloud", + "name": "Google", "owners": ["{}"], - "slug": "gmicloud", - "statusPageUrl": null + "slug": "google-vertex/global", + "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "zai-org/GLM-4.7-FP8", - "provider_name": "GMICloud", - "provider_region": null, - "provider_slug": "gmicloud/fp8", - "quantization": "fp8", + "provider_model_id": "gemini-2.5-pro", + "provider_name": "Google", + "provider_region": "global", + "provider_slug": "google-vertex/global", + "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", - "structured_outputs", - "response_format" + "stop", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, - "variable_pricings": [], + "supports_tool_parameters": true, + "variable_pricings": [ + { + "completions": "0.000015", + "input_cache_read": "0.00000025", + "input_cache_write": "0.00000075", + "prompt": "0.0000025", + "threshold": 200000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.7", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.7", + "name": "Google: Gemini 2.5 Pro", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.7-20251222", + "permaslug": "google/gemini-2.5-pro", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.7", - "slug": "z-ai/glm-4.7", - "updated_at": "2026-01-07T19:34:06.523149+00:00", + "short_name": "Gemini 2.5 Pro", + "slug": "google/gemini-2.5-pro", + "updated_at": "2026-01-08T23:55:54.79011+00:00", "warning_message": null - } - ], - "name": "GMICloud", - "slug": "gmicloud" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "Google Vertex", - "headquarters": "US", - "icon": { - "url": "/images/icons/GoogleVertex.svg" - }, - "models": [ + }, { - "author": "anthropic", - "context_length": 200000, - "created_at": "2024-03-13T00:00:00+00:00", - "default_parameters": {}, + "author": "google", + "context_length": 1048576, + "created_at": "2025-05-07T00:41:53+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", "endpoint": { - "adapter_name": "GoogleVertexAnthropicAdapter", + "adapter_name": "GoogleVertexGeminiAdapter", "can_abort": false, - "context_length": 200000, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -67935,6 +69293,12 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_base64_video_input": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -67944,59 +69308,71 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "c8ee28fa-00f7-4a61-adc8-f2272f7e0154", + "id": "9d2cac4d-81d4-4e67-ac7a-6c73040655ee", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": 65535, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthropic", - "context_length": 200000, - "created_at": "2024-03-13T00:00:00+00:00", - "default_parameters": {}, + "author": "google", + "context_length": 1048576, + "created_at": "2025-05-07T00:41:53+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal", - "features": {}, - "group": "Claude", - "has_text_output": true, - "hf_slug": null, - "hf_updated_at": null, + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Gemini", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": "028ec497-a034-40fd-81fe-f51d0a0c640c", - "name": "Anthropic: Claude 3 Haiku", + "model_version_group_id": null, + "name": "Google: Gemini 2.5 Pro Preview 05-06", "output_modalities": ["text"], - "permaslug": "anthropic/claude-3-haiku", - "reasoning_config": null, + "permaslug": "google/gemini-2.5-pro-preview-03-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Claude 3 Haiku", - "slug": "anthropic/claude-3-haiku", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 2.5 Pro Preview 05-06", + "slug": "google/gemini-2.5-pro-preview-05-06", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-3-haiku", - "model_variant_slug": "anthropic/claude-3-haiku", + "model_variant_permaslug": "google/gemini-2.5-pro-preview-03-25", + "model_variant_slug": "google/gemini-2.5-pro-preview-05-06", "moderation_required": false, - "name": "Google | anthropic/claude-3-haiku", + "name": "Google | google/gemini-2.5-pro-preview-03-25", "pricing": { - "completion": "0.00000125", + "completion": "0.00001", "discount": 0, - "image": "0.0004", - "image_output": "0", - "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "image": "0.00000125", + "input_cache_read": "0.000000125", + "internal_reasoning": "0.00001", + "prompt": "0.00000125" }, "provider_display_name": "Google Vertex", "provider_info": { @@ -68056,7 +69432,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -68066,61 +69443,81 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "claude-3-haiku@20240307", + "provider_model_id": "gemini-2.5-pro-preview-05-06", "provider_name": "Google", "provider_region": null, "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", - "top_k", + "seed", "stop", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "completions": "0.000015", + "input_cache_read": "0.000000625", + "input_cache_write": "0.00000075", + "prompt": "0.0000025", + "threshold": 200000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, - "features": {}, - "group": "Claude", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Gemini", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": "028ec497-a034-40fd-81fe-f51d0a0c640c", - "name": "Anthropic: Claude 3 Haiku", + "model_version_group_id": null, + "name": "Google: Gemini 2.5 Pro Preview 05-06", "output_modalities": ["text"], - "permaslug": "anthropic/claude-3-haiku", - "reasoning_config": null, + "permaslug": "google/gemini-2.5-pro-preview-03-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Claude 3 Haiku", - "slug": "anthropic/claude-3-haiku", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 2.5 Pro Preview 05-06", + "slug": "google/gemini-2.5-pro-preview-05-06", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "anthropic", - "context_length": 200000, - "created_at": "2024-11-04T00:00:00+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "google", + "context_length": 1048576, + "created_at": "2025-06-05T15:27:37.538116+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.\n\nThis makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.\n\nThis model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.\n", "endpoint": { - "adapter_name": "GoogleVertexAnthropicAdapter", + "adapter_name": "GoogleVertexGeminiAdapter", "can_abort": false, - "context_length": 200000, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -68130,6 +69527,12 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -68139,74 +69542,66 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "8636daa9-bd75-466e-8440-1aa27df5942a", + "id": "018040ae-9cda-43cd-8813-d8ca83f6c7ed", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthropic", - "context_length": 200000, - "created_at": "2024-11-04T00:00:00+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "google", + "context_length": 1048576, + "created_at": "2025-06-05T15:27:37.538116+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic tasks such as chat interactions and immediate coding suggestions.\n\nThis makes it highly suitable for environments that demand both speed and precision, such as software development, customer service bots, and data management systems.\n\nThis model is currently pointing to [Claude 3.5 Haiku (2024-10-22)](/anthropic/claude-3-5-haiku-20241022).", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.\n", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Claude", + "group": "Gemini", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["file", "image", "text", "audio"], "instruct_type": null, - "model_version_group_id": "028ec497-a034-40fd-81fe-f51d0a0c640c", - "name": "Anthropic: Claude 3.5 Haiku", + "model_version_group_id": null, + "name": "Google: Gemini 2.5 Pro Preview 06-05", "output_modalities": ["text"], - "permaslug": "anthropic/claude-3-5-haiku", + "permaslug": "google/gemini-2.5-pro-preview-06-05", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude 3.5 Haiku", - "slug": "anthropic/claude-3.5-haiku", - "updated_at": "2025-12-05T21:54:21.40359+00:00", + "short_name": "Gemini 2.5 Pro Preview 06-05", + "slug": "google/gemini-2.5-pro-preview", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-3-5-haiku", - "model_variant_slug": "anthropic/claude-3.5-haiku", + "model_variant_permaslug": "google/gemini-2.5-pro-preview-06-05", + "model_variant_slug": "google/gemini-2.5-pro-preview", "moderation_required": false, - "name": "Google | anthropic/claude-3-5-haiku", + "name": "Google | google/gemini-2.5-pro-preview-06-05", "pricing": { - "completion": "0.000004", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000008", - "internal_reasoning": "0", - "prompt": "0.0000008", - "request": "0", - "web_search": "0" + "image": "0.00000125", + "input_cache_read": "0.000000125", + "internal_reasoning": "0.00001", + "prompt": "0.00000125" }, "provider_display_name": "Google Vertex", "provider_info": { @@ -68266,7 +69661,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -68276,68 +69672,84 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "claude-3-5-haiku@20241022", + "provider_model_id": "gemini-2.5-pro-preview-06-05", "provider_name": "Google", - "provider_region": null, + "provider_region": "global", "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", - "top_k", + "seed", "stop", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "completions": "0.000015", + "input_cache_read": "0.000000625", + "input_cache_write": "0.00000075", + "prompt": "0.0000025", + "threshold": 200000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Claude", + "group": "Gemini", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["file", "image", "text", "audio"], "instruct_type": null, - "model_version_group_id": "028ec497-a034-40fd-81fe-f51d0a0c640c", - "name": "Anthropic: Claude 3.5 Haiku", + "model_version_group_id": null, + "name": "Google: Gemini 2.5 Pro Preview 06-05", "output_modalities": ["text"], - "permaslug": "anthropic/claude-3-5-haiku", + "permaslug": "google/gemini-2.5-pro-preview-06-05", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude 3.5 Haiku", - "slug": "anthropic/claude-3.5-haiku", - "updated_at": "2025-12-05T21:54:21.40359+00:00", + "short_name": "Gemini 2.5 Pro Preview 06-05", + "slug": "google/gemini-2.5-pro-preview", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "anthropic", - "context_length": 200000, - "created_at": "2024-11-04T00:00:00+00:00", - "default_parameters": {}, + "author": "google", + "context_length": 1048576, + "created_at": "2025-12-17T15:57:58+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.\n\nIt does not support image inputs.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)", + "description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.\n\nThe model supports a 1M token context window and multimodal inputs including text, images, audio, video, and PDFs, with text output. It includes configurable reasoning via thinking levels (minimal, low, medium, high), structured output, tool use, and automatic context caching. Gemini 3 Flash Preview is optimized for users who want strong reasoning and agentic behavior without the cost or latency of full scale frontier models.", "endpoint": { - "adapter_name": "GoogleVertexAnthropicAdapter", + "adapter_name": "GoogleVertexGeminiAdapter", "can_abort": false, - "context_length": 200000, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -68347,78 +69759,87 @@ "training": false }, "features": { + "supports_base64_video_input": true, + "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": true }, "has_chat_completions": true, "has_completions": false, - "id": "e0420c5b-5dc7-4577-b2a7-1dfa64c5b569", + "id": "d06bdb18-3cc2-46b0-bf23-922e485dc255", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 275, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 65535, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthropic", - "context_length": 200000, - "created_at": "2024-11-04T00:00:00+00:00", - "default_parameters": {}, + "author": "google", + "context_length": 1048576, + "created_at": "2025-12-17T15:57:58+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Claude 3.5 Haiku features enhancements across all skill sets including coding, tool use, and reasoning. As the fastest model in the Anthropic lineup, it offers rapid response times suitable for applications that require high interactivity and low latency, such as user-facing chatbots and on-the-fly code completions. It also excels in specialized tasks like data extraction and real-time content moderation, making it a versatile tool for a broad range of industries.\n\nIt does not support image inputs.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/3-5-models-and-computer-use)", + "description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.\n\nThe model supports a 1M token context window and multimodal inputs including text, images, audio, video, and PDFs, with text output. It includes configurable reasoning via thinking levels (minimal, low, medium, high), structured output, tool use, and automatic context caching. Gemini 3 Flash Preview is optimized for users who want strong reasoning and agentic behavior without the cost or latency of full scale frontier models.", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Claude", + "group": "Gemini", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": "028ec497-a034-40fd-81fe-f51d0a0c640c", - "name": "Anthropic: Claude 3.5 Haiku (2024-10-22)", + "model_version_group_id": null, + "name": "Google: Gemini 3 Flash Preview", "output_modalities": ["text"], - "permaslug": "anthropic/claude-3-5-haiku-20241022", + "permaslug": "google/gemini-3-flash-preview-20251217", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude 3.5 Haiku (2024-10-22)", - "slug": "anthropic/claude-3.5-haiku-20241022", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 3 Flash Preview", + "slug": "google/gemini-3-flash-preview", + "updated_at": "2025-12-17T16:17:44.159277+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-3-5-haiku-20241022", - "model_variant_slug": "anthropic/claude-3.5-haiku-20241022", + "model_variant_permaslug": "google/gemini-3-flash-preview-20251217", + "model_variant_slug": "google/gemini-3-flash-preview", "moderation_required": false, - "name": "Google | anthropic/claude-3-5-haiku-20241022", + "name": "Google | google/gemini-3-flash-preview-20251217", "pricing": { - "completion": "0.000004", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000008", - "internal_reasoning": "0", - "prompt": "0.0000008", - "request": "0", - "web_search": "0" + "image": "0.0000005", + "input_cache_read": "0.00000005", + "internal_reasoning": "0.000003", + "prompt": "0.0000005" }, "provider_display_name": "Google Vertex", "provider_info": { @@ -68478,7 +69899,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -68488,59 +69910,66 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "claude-3-5-haiku@20241022", + "provider_model_id": "gemini-3-flash-preview", "provider_name": "Google", - "provider_region": null, + "provider_region": "global", "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", - "top_k", + "seed", + "response_format", "stop", - "tools", - "tool_choice" + "structured_outputs", + "tool_choice", + "tools" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Claude", + "group": "Gemini", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": "028ec497-a034-40fd-81fe-f51d0a0c640c", - "name": "Anthropic: Claude 3.5 Haiku (2024-10-22)", + "model_version_group_id": null, + "name": "Google: Gemini 3 Flash Preview", "output_modalities": ["text"], - "permaslug": "anthropic/claude-3-5-haiku-20241022", + "permaslug": "google/gemini-3-flash-preview-20251217", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude 3.5 Haiku (2024-10-22)", - "slug": "anthropic/claude-3.5-haiku-20241022", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 3 Flash Preview", + "slug": "google/gemini-3-flash-preview", + "updated_at": "2025-12-17T16:17:44.159277+00:00", "warning_message": null }, { - "author": "anthropic", - "context_length": 200000, - "created_at": "2025-02-24T18:35:10.00008+00:00", + "author": "google", + "context_length": 1048576, + "created_at": "2025-11-18T14:04:28+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -68548,11 +69977,11 @@ }, "default_stops": [], "default_system": null, - "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", + "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.", "endpoint": { - "adapter_name": "GoogleVertexAnthropicAdapter", + "adapter_name": "GoogleVertexGeminiAdapter", "can_abort": false, - "context_length": 200000, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -68562,32 +69991,39 @@ "training": false }, "features": { - "supported_parameters": {}, + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_base64_video_input": true, + "supports_file_urls": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": true }, "has_chat_completions": true, "has_completions": false, - "id": "1c9b8776-e266-4efb-b5ba-19a6753e7736", + "id": "ca4e1f52-1f15-4258-a4eb-d6b56c0c4057", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 300, "limit_rpm_cf": null, - "max_completion_tokens": 64000, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthropic", - "context_length": 200000, - "created_at": "2025-02-24T18:35:10.00008+00:00", + "author": "google", + "context_length": 1048576, + "created_at": "2025-11-18T14:04:28+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -68595,51 +70031,50 @@ }, "default_stops": [], "default_system": null, - "description": "Claude 3.7 Sonnet is an advanced large language model with improved reasoning, coding, and problem-solving capabilities. It introduces a hybrid reasoning approach, allowing users to choose between rapid responses and extended, step-by-step processing for complex tasks. The model demonstrates notable improvements in coding, particularly in front-end development and full-stack updates, and excels in agentic workflows, where it can autonomously navigate multi-step processes. \n\nClaude 3.7 Sonnet maintains performance parity with its predecessor in standard mode while offering an extended reasoning mode for enhanced accuracy in math, coding, and instruction-following tasks.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-3-7-sonnet)", + "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.", "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Claude", + "group": "Gemini", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": "30636d20-cda3-4a59-aa0c-1a5b6efba072", - "name": "Anthropic: Claude 3.7 Sonnet", + "model_version_group_id": null, + "name": "Google: Gemini 3 Pro Preview", "output_modalities": ["text"], - "permaslug": "anthropic/claude-3-7-sonnet-20250219", + "permaslug": "google/gemini-3-pro-preview-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude 3.7 Sonnet", - "slug": "anthropic/claude-3.7-sonnet", - "updated_at": "2025-12-05T21:54:07.586262+00:00", + "short_name": "Gemini 3 Pro Preview", + "slug": "google/gemini-3-pro-preview", + "updated_at": "2026-01-27T15:11:59.578671+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-3-7-sonnet-20250219", - "model_variant_slug": "anthropic/claude-3.7-sonnet", + "model_variant_permaslug": "google/gemini-3-pro-preview-20251117", + "model_variant_slug": "google/gemini-3-pro-preview", "moderation_required": false, - "name": "Google | anthropic/claude-3-7-sonnet-20250219", + "name": "Google | google/gemini-3-pro-preview-20251117", "pricing": { - "completion": "0.000015", + "completion": "0.000012", "discount": 0, - "image": "0.0048", - "image_output": "0", - "input_cache_read": "0.0000003", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "image": "0.000002", + "input_cache_read": "0.0000002", + "internal_reasoning": "0.000012", + "prompt": "0.000002" }, "provider_display_name": "Google Vertex", "provider_info": { @@ -68699,71 +70134,86 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, "name": "Google", "owners": ["{}"], - "slug": "google-vertex/us", + "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "claude-3-7-sonnet@20250219", + "provider_model_id": "gemini-3-pro-preview", "provider_name": "Google", - "provider_region": null, - "provider_slug": "google-vertex/us", + "provider_region": "global", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", - "top_p", "temperature", + "top_p", + "seed", "stop", - "reasoning", - "include_reasoning", "tools", "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "completions": "0.000018", + "input_cache_read": "0.0000004", + "input_cache_write": "0.00000075", + "prompt": "0.000004", + "threshold": 200000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Claude", + "group": "Gemini", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": "30636d20-cda3-4a59-aa0c-1a5b6efba072", - "name": "Anthropic: Claude 3.7 Sonnet", + "model_version_group_id": null, + "name": "Google: Gemini 3 Pro Preview", "output_modalities": ["text"], - "permaslug": "anthropic/claude-3-7-sonnet-20250219", + "permaslug": "google/gemini-3-pro-preview-20251117", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude 3.7 Sonnet", - "slug": "anthropic/claude-3.7-sonnet", - "updated_at": "2025-12-05T21:54:07.586262+00:00", + "short_name": "Gemini 3 Pro Preview", + "slug": "google/gemini-3-pro-preview", + "updated_at": "2026-01-27T15:11:59.578671+00:00", "warning_message": null }, { - "author": "anthropic", - "context_length": 200000, - "created_at": "2025-10-15T17:00:38+00:00", + "author": "google", + "context_length": 65536, + "created_at": "2025-11-20T15:49:57.064095+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -68771,11 +70221,11 @@ }, "default_stops": [], "default_system": null, - "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\n\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", + "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\n\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.", "endpoint": { - "adapter_name": "GoogleVertexAnthropicAdapter", + "adapter_name": "GoogleVertexGeminiAdapter", "can_abort": false, - "context_length": 200000, + "context_length": 65536, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -68785,9 +70235,8 @@ "training": false }, "features": { - "supported_parameters": {}, + "is_mandatory_reasoning": true, "supports_input_audio": false, - "supports_native_web_search": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -68797,22 +70246,22 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "8a5e69a1-ea0d-4af6-899e-8b7e1c93a7a9", + "id": "be4c8ae2-b6e5-4e54-b556-733445e52f1e", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 250, "limit_rpm_cf": null, - "max_completion_tokens": 64000, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthropic", - "context_length": 200000, - "created_at": "2025-10-15T17:00:38+00:00", + "author": "google", + "context_length": 65536, + "created_at": "2025-11-20T15:49:57.064095+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -68820,16 +70269,18 @@ }, "default_stops": [], "default_system": null, - "description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance across reasoning, coding, and computer-use tasks, Haiku 4.5 brings frontier-level capability to real-time and high-volume applications.\n\nIt introduces extended thinking to the Haiku line; enabling controllable reasoning depth, summarized or interleaved thought output, and tool-assisted workflows with full support for coding, bash, web search, and computer-use tools. Scoring >73% on SWE-bench Verified, Haiku 4.5 ranks among the world’s best coding models while maintaining exceptional responsiveness for sub-agents, parallelized execution, and scaled deployment.", + "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\n\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.", "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Claude", + "group": "Gemini", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, @@ -68837,34 +70288,32 @@ "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Haiku 4.5", - "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.5-haiku-20251001", + "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)", + "output_modalities": ["image", "text"], + "permaslug": "google/gemini-3-pro-image-preview-20251120", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude Haiku 4.5", - "slug": "anthropic/claude-haiku-4.5", - "updated_at": "2025-12-05T21:53:18.541396+00:00", + "short_name": "Nano Banana Pro (Gemini 3 Pro Image Preview)", + "slug": "google/gemini-3-pro-image-preview", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-4.5-haiku-20251001", - "model_variant_slug": "anthropic/claude-haiku-4.5", + "model_variant_permaslug": "google/gemini-3-pro-image-preview-20251120", + "model_variant_slug": "google/gemini-3-pro-image-preview", "moderation_required": false, - "name": "Google | anthropic/claude-4.5-haiku-20251001", + "name": "Google | google/gemini-3-pro-image-preview-20251120", "pricing": { - "completion": "0.000005", + "completion": "0.000012", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000001", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "image": "0.000002", + "image_output": "0.00012", + "input_cache_read": "0.0000002", + "internal_reasoning": "0.000012", + "prompt": "0.000002" }, "provider_display_name": "Google Vertex", "provider_info": { @@ -68924,7 +70373,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -68934,37 +70384,39 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "claude-haiku-4-5@20251001", + "provider_model_id": "gemini-3-pro-image-preview", "provider_name": "Google", "provider_region": "global", "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", - "top_p", "temperature", + "top_p", + "seed", + "response_format", "stop", - "reasoning", - "include_reasoning", - "tools", - "tool_choice", - "top_k" + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Claude", + "group": "Gemini", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, @@ -68972,36 +70424,32 @@ "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Haiku 4.5", - "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.5-haiku-20251001", + "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)", + "output_modalities": ["image", "text"], + "permaslug": "google/gemini-3-pro-image-preview-20251120", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Claude Haiku 4.5", - "slug": "anthropic/claude-haiku-4.5", - "updated_at": "2025-12-05T21:53:18.541396+00:00", + "short_name": "Nano Banana Pro (Gemini 3 Pro Image Preview)", + "slug": "google/gemini-3-pro-image-preview", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "anthropic", - "context_length": 200000, - "created_at": "2025-05-22T16:27:25.029961+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 128000, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", + "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { - "adapter_name": "GoogleVertexAnthropicAdapter", + "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 200000, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -69011,7 +70459,10 @@ "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -69021,77 +70472,57 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "377a8014-1242-4923-ae2c-20946f6c18d8", + "id": "a8eea0ee-a665-45dd-8e0a-b8f6cb8edf43", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32000, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthropic", - "context_length": 200000, - "created_at": "2025-05-22T16:27:25.029961+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in software engineering, achieving leading results on SWE-bench (72.5%) and Terminal-bench (43.2%). Opus 4 supports extended, agentic workflows, handling thousands of task steps continuously for hours without degradation. \n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Claude", + "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Meta-Llama-3.1-405B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Anthropic: Claude Opus 4", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", + "name": "Meta: Llama 3.1 405B Instruct", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4-opus-20250522", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.1-405b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Claude Opus 4", - "slug": "anthropic/claude-opus-4", + "short_name": "Llama 3.1 405B Instruct", + "slug": "meta-llama/llama-3.1-405b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-4-opus-20250522", - "model_variant_slug": "anthropic/claude-opus-4", + "model_variant_permaslug": "meta-llama/llama-3.1-405b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-405b-instruct", "moderation_required": false, - "name": "Google | anthropic/claude-4-opus-20250522", + "name": "Google | meta-llama/llama-3.1-405b-instruct", "pricing": { - "completion": "0.000075", + "completion": "0.000016", "discount": 0, - "image": "0.024", - "image_output": "0", - "input_cache_read": "0.0000015", - "internal_reasoning": "0", - "prompt": "0.000015", - "request": "0", - "web_search": "0" + "prompt": "0.000005" }, - "provider_display_name": "Google Vertex (Europe)", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", + "adapterName": "VertexOpenAIAdapter", "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { @@ -69102,7 +70533,7 @@ "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex (Europe)", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, @@ -69147,82 +70578,73 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, "name": "Google", "owners": ["{}"], - "slug": "google-vertex/europe", + "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "claude-opus-4@20250514", + "provider_model_id": "meta/llama-3.1-405b-instruct-maas", "provider_name": "Google", - "provider_region": "europe-west4", - "provider_slug": "google-vertex/europe", + "provider_region": "us-central1", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", - "top_p", "temperature", + "top_p", + "seed", "stop", - "reasoning", - "include_reasoning", + "frequency_penalty", + "presence_penalty", + "top_k", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Claude", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Meta-Llama-3.1-405B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Anthropic: Claude Opus 4", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", + "name": "Meta: Llama 3.1 405B Instruct", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4-opus-20250522", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.1-405b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Claude Opus 4", - "slug": "anthropic/claude-opus-4", + "short_name": "Llama 3.1 405B Instruct", + "slug": "meta-llama/llama-3.1-405b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "anthropic", - "context_length": 200000, - "created_at": "2025-08-05T16:33:11.634562+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 128000, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { - "adapter_name": "GoogleVertexAnthropicAdapter", + "adapter_name": "GoogleVertexGeminiAdapter", "can_abort": false, - "context_length": 200000, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -69232,8 +70654,10 @@ "training": false }, "features": { - "supported_parameters": {}, - "supports_input_audio": false, + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -69243,75 +70667,55 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "fd53ee1e-86ae-40b4-ba95-79bdce79051c", + "id": "4b49bf33-fc17-4e36-97ec-a163b306d8d9", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthropic", - "context_length": 200000, - "created_at": "2025-08-05T16:33:11.634562+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains in multi-file code refactoring, debugging precision, and detail-oriented reasoning. The model supports extended thinking up to 64K tokens and is optimized for tasks involving research, data analysis, and tool-assisted reasoning.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Claude", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Anthropic: Claude Opus 4.1", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.1-opus-20250805", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Claude Opus 4.1", - "slug": "anthropic/claude-opus-4.1", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-4.1-opus-20250805", - "model_variant_slug": "anthropic/claude-opus-4.1", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "Google | anthropic/claude-4.1-opus-20250805", + "name": "Google | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0.000075", + "completion": "0.00000072", "discount": 0, - "image": "0.024", - "image_output": "0", - "input_cache_read": "0.0000015", - "internal_reasoning": "0", - "prompt": "0.000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000072" }, - "provider_display_name": "Google Vertex (Global)", + "provider_display_name": "Google Vertex", "provider_info": { "adapterName": "GoogleVertexGeminiAdapter", "baseUrl": "not_used", @@ -69324,7 +70728,7 @@ "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex (Global)", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, @@ -69369,82 +70773,69 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, "name": "Google", "owners": ["{}"], - "slug": "google-vertex/global", + "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "claude-opus-4-1@20250805", + "provider_model_id": "llama-3.3-70b-instruct-maas", "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex/global", + "provider_region": null, + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ "max_tokens", - "top_p", "temperature", + "top_p", + "seed", + "response_format", "stop", - "reasoning", - "include_reasoning", - "tools", - "tool_choice" + "frequency_penalty", + "presence_penalty" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Claude", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Anthropic: Claude Opus 4.1", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.1-opus-20250805", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Claude Opus 4.1", - "slug": "anthropic/claude-opus-4.1", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "anthropic", - "context_length": 200000, - "created_at": "2025-11-24T18:56:20+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 524288, + "created_at": "2025-04-05T19:37:02.129674+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection. The model is designed to operate efficiently across varied effort levels, enabling developers to trade off speed, depth, and token usage depending on task requirements. It comes with a new parameter to control token efficiency, which can be accessed using the OpenRouter Verbosity parameter with low, medium, or high.\n\nOpus 4.5 supports advanced tool use, extended context management, and coordinated multi-agent setups, making it well-suited for autonomous research, debugging, multi-step planning, and spreadsheet/browser manipulation. It delivers substantial gains in structured reasoning, execution reliability, and alignment compared to prior Opus generations, while reducing token overhead and improving performance on long-running tasks.", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", "endpoint": { - "adapter_name": "GoogleVertexAnthropicAdapter", + "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 200000, + "context_length": 524288, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -69454,8 +70845,10 @@ "training": false }, "features": { - "supports_input_audio": false, - "supports_native_web_search": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -69465,78 +70858,57 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "f28aad62-67fa-4156-a139-0b80b28bc08f", + "id": "3479a1df-3f25-46b3-9e3d-bb323bee5d23", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 130, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 64000, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthropic", - "context_length": 200000, - "created_at": "2025-11-24T18:56:20+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 1048576, + "created_at": "2025-04-05T19:37:02.129674+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and reasoning benchmarks, and improved robustness to prompt injection. The model is designed to operate efficiently across varied effort levels, enabling developers to trade off speed, depth, and token usage depending on task requirements. It comes with a new parameter to control token efficiency, which can be accessed using the OpenRouter Verbosity parameter with low, medium, or high.\n\nOpus 4.5 supports advanced tool use, extended context management, and coordinated multi-agent setups, making it well-suited for autonomous research, debugging, multi-step planning, and spreadsheet/browser manipulation. It delivers substantial gains in structured reasoning, execution reliability, and alignment compared to prior Opus generations, while reducing token overhead and improving performance on long-running tasks.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Claude", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Opus 4.5", + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.5-opus-20251124", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "reasoning_config": null, "router": null, - "short_name": "Claude Opus 4.5", - "slug": "anthropic/claude-opus-4.5", - "updated_at": "2025-12-05T21:53:06.931607+00:00", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-4.5-opus-20251124", - "model_variant_slug": "anthropic/claude-opus-4.5", + "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "model_variant_slug": "meta-llama/llama-4-maverick", "moderation_required": false, - "name": "Google | anthropic/claude-4.5-opus-20251124", + "name": "Google | meta-llama/llama-4-maverick-17b-128e-instruct", "pricing": { - "completion": "0.000025", + "completion": "0.00000115", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000005", - "internal_reasoning": "0", - "prompt": "0.000005", - "request": "0", - "web_search": "0.01" + "prompt": "0.00000035" }, "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", + "adapterName": "VertexOpenAIAdapter", "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { @@ -69592,7 +70964,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -69602,73 +70975,62 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "claude-opus-4-5@20251101", + "provider_model_id": "meta/llama-4-maverick-17b-128e-instruct-maas", "provider_name": "Google", - "provider_region": "global", + "provider_region": "us-east5", "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", + "top_p", + "seed", "stop", - "reasoning", - "include_reasoning", - "tool_choice", + "frequency_penalty", + "presence_penalty", + "top_k", "tools", - "verbosity" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Claude", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Opus 4.5", + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.5-opus-20251124", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "reasoning_config": null, "router": null, - "short_name": "Claude Opus 4.5", - "slug": "anthropic/claude-opus-4.5", - "updated_at": "2025-12-05T21:53:06.931607+00:00", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "anthropic", - "context_length": 1000000, - "created_at": "2025-05-22T16:12:51.381897+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 1310720, + "created_at": "2025-04-05T19:31:59.735804+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%), Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions. Sonnet 4 is optimized for practical everyday use, providing advanced reasoning capabilities while maintaining efficiency and responsiveness in diverse internal and external scenarios.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", "endpoint": { - "adapter_name": "GoogleVertexAnthropicAdapter", + "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 1000000, + "context_length": 1310720, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -69678,7 +71040,10 @@ "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -69688,7 +71053,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "5e3f0568-c089-416a-ab7b-6c432999b571", + "id": "6b6ca1d2-f0a3-4b14-afd1-9ec651371658", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -69697,69 +71062,48 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 64000, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthropic", - "context_length": 1000000, - "created_at": "2025-05-22T16:12:51.381897+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 10000000, + "created_at": "2025-04-05T19:31:59.735804+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%), Sonnet 4 balances capability and computational efficiency, making it suitable for a broad range of applications from routine coding tasks to complex software development projects. Key enhancements include improved autonomous codebase navigation, reduced error rates in agent-driven workflows, and increased reliability in following intricate instructions. Sonnet 4 is optimized for practical everyday use, providing advanced reasoning capabilities while maintaining efficiency and responsiveness in diverse internal and external scenarios.\n\nRead more at the [blog post here](https://www.anthropic.com/news/claude-4)", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Claude", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Sonnet 4", + "name": "Meta: Llama 4 Scout", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4-sonnet-20250522", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "reasoning_config": null, "router": null, - "short_name": "Claude Sonnet 4", - "slug": "anthropic/claude-sonnet-4", - "updated_at": "2025-12-05T21:53:41.372783+00:00", + "short_name": "Llama 4 Scout", + "slug": "meta-llama/llama-4-scout", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-4-sonnet-20250522", - "model_variant_slug": "anthropic/claude-sonnet-4", + "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "model_variant_slug": "meta-llama/llama-4-scout", "moderation_required": false, - "name": "Google | anthropic/claude-4-sonnet-20250522", + "name": "Google | meta-llama/llama-4-scout-17b-16e-instruct", "pricing": { - "completion": "0.000015", + "completion": "0.0000007", "discount": 0, - "image": "0.0048", - "image_output": "0", - "input_cache_read": "0.0000003", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", + "adapterName": "VertexOpenAIAdapter", "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { @@ -69815,7 +71159,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -69825,82 +71170,66 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "claude-sonnet-4@20250514", + "provider_model_id": "meta/llama-4-scout-17b-16e-instruct-maas", "provider_name": "Google", - "provider_region": null, + "provider_region": "us-east5", "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", - "top_p", "temperature", + "top_p", + "seed", "stop", - "reasoning", - "include_reasoning", + "frequency_penalty", + "presence_penalty", + "top_k", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, - "variable_pricings": [ - { - "completions": "0.0000225", - "input_cache_read": "0.0000006", - "input_cache_write": "0.0000075", - "prompt": "0.000006", - "threshold": 200000, - "type": "prompt-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Claude", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Sonnet 4", + "name": "Meta: Llama 4 Scout", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4-sonnet-20250522", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "reasoning_config": null, "router": null, - "short_name": "Claude Sonnet 4", - "slug": "anthropic/claude-sonnet-4", - "updated_at": "2025-12-05T21:53:41.372783+00:00", + "short_name": "Llama 4 Scout", + "slug": "meta-llama/llama-4-scout", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "anthropic", - "context_length": 1000000, - "created_at": "2025-09-29T16:01:16.552976+00:00", + "author": "minimax", + "context_length": 196608, + "created_at": "2025-10-23T20:41:33.120854+00:00", "default_parameters": { "frequency_penalty": null, "temperature": 1, - "top_p": 1 + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with improvements across system design, code security, and specification adherence. The model is designed for extended autonomous operation, maintaining task continuity across sessions and providing fact-based progress tracking.\n\nSonnet 4.5 also introduces stronger agentic capabilities, including improved tool orchestration, speculative parallel execution, and more efficient context and memory management. With enhanced context tracking and awareness of token usage across tool calls, it is particularly well-suited for multi-context and long-running workflows. Use cases span software engineering, cybersecurity, financial analysis, research agents, and other domains requiring sustained reasoning and tool use.", + "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "endpoint": { - "adapter_name": "GoogleVertexAnthropicAdapter", + "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 1000000, + "context_length": 196608, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -69910,8 +71239,9 @@ "training": false }, "features": { - "supported_parameters": {}, - "supports_input_audio": false, + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "content-string", + "should_send_reasoning_text_in_text_content": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -69921,7 +71251,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "83c3ce47-c679-4cb1-9e21-a5defad78b61", + "id": "c80588c8-ce64-46f2-b375-19bf4504133e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -69930,68 +71260,66 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 64000, + "max_completion_tokens": 196608, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthropic", - "context_length": 1000000, - "created_at": "2025-09-29T16:01:16.552976+00:00", + "author": "minimax", + "context_length": 204800, + "created_at": "2025-10-23T20:41:33.120854+00:00", "default_parameters": { "frequency_penalty": null, "temperature": 1, - "top_p": 1 + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with improvements across system design, code security, and specification adherence. The model is designed for extended autonomous operation, maintaining task continuity across sessions and providing fact-based progress tracking.\n\nSonnet 4.5 also introduces stronger agentic capabilities, including improved tool orchestration, speculative parallel execution, and more efficient context and memory management. With enhanced context tracking and awareness of token usage across tool calls, it is particularly well-suited for multi-context and long-running workflows. Use cases span software engineering, cybersecurity, financial analysis, research agents, and other domains requiring sustained reasoning and tool use.", + "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Claude", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "MiniMaxAI/MiniMax-M2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Sonnet 4.5", + "name": "MiniMax: MiniMax M2", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.5-sonnet-20250929", + "permaslug": "minimax/minimax-m2", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Claude Sonnet 4.5", - "slug": "anthropic/claude-sonnet-4.5", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2", + "slug": "minimax/minimax-m2", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "anthropic/claude-4.5-sonnet-20250929", - "model_variant_slug": "anthropic/claude-sonnet-4.5", + "model_variant_permaslug": "minimax/minimax-m2", + "model_variant_slug": "minimax/minimax-m2", "moderation_required": false, - "name": "Google | anthropic/claude-4.5-sonnet-20250929", + "name": "Google | minimax/minimax-m2", "pricing": { - "completion": "0.000015", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.0000003", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", + "adapterName": "VertexOpenAIAdapter", "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { @@ -70047,7 +71375,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -70057,78 +71386,82 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "claude-sonnet-4-5@20250929", + "provider_model_id": "minimaxai/minimax-m2-maas", "provider_name": "Google", - "provider_region": "us-east5", + "provider_region": null, "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", - "top_p", "temperature", + "top_p", + "seed", + "response_format", "stop", - "reasoning", - "include_reasoning", + "frequency_penalty", + "presence_penalty", + "top_k", + "repetition_penalty", + "structured_outputs", "tools", - "tool_choice", - "top_k" + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "completions": "0.0000225", - "input_cache_read": "0.0000006", - "input_cache_write": "0.0000075", - "prompt": "0.000006", - "threshold": 200000, - "type": "prompt-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Claude", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "MiniMaxAI/MiniMax-M2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Anthropic: Claude Sonnet 4.5", + "name": "MiniMax: MiniMax M2", "output_modalities": ["text"], - "permaslug": "anthropic/claude-4.5-sonnet-20250929", + "permaslug": "minimax/minimax-m2", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Claude Sonnet 4.5", - "slug": "anthropic/claude-sonnet-4.5", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2", + "slug": "minimax/minimax-m2", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-08-21T12:33:48+00:00", - "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "endpoint": { "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 163840, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -70138,10 +71471,8 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -70151,7 +71482,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "26971f72-0b43-4255-a39d-98da9ca32119", + "id": "e841d362-719d-4f53-86c8-f83f5bbd7361", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -70160,59 +71491,60 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-08-21T12:33:48+00:00", - "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3.1", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.1", - "slug": "deepseek/deepseek-chat-v3.1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3.1", - "model_variant_slug": "deepseek/deepseek-chat-v3.1", + "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", + "model_variant_slug": "moonshotai/kimi-k2-thinking", "moderation_required": false, - "name": "Google | deepseek/deepseek-chat-v3.1", + "name": "Google | moonshotai/kimi-k2-thinking-20251106", "pricing": { - "completion": "0.0000017", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000006", + "prompt": "0.0000006" }, "provider_display_name": "Google Vertex", "provider_info": { @@ -70272,7 +71604,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -70282,25 +71615,25 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "deepseek-ai/deepseek-v3.1-maas", + "provider_model_id": "moonshotai/kimi-k2-thinking-maas", "provider_name": "Google", - "provider_region": "us-west2", + "provider_region": null, "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "seed", + "response_format", "stop", "frequency_penalty", "presence_penalty", "top_k", "repetition_penalty", + "structured_outputs", "tools", "tool_choice" ], @@ -70311,50 +71644,51 @@ "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3.1", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.1", - "slug": "deepseek/deepseek-chat-v3.1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-12-01T13:10:42.818885+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "endpoint": { "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 163840, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -70364,17 +71698,21 @@ "training": false }, "features": { - "reasoning_return_mechanism": "reasoning-content", + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, - "literal_none": false, + "literal_none": true, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": false, - "id": "a933135e-3dfc-49e2-b9ad-be0bcd2bcdf2", + "id": "93ec7f6d-c7aa-44c1-b9a7-2cd664a17b13", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -70383,21 +71721,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", + "author": "openai", "context_length": 131072, - "created_at": "2025-12-01T13:10:42.818885+00:00", + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -70406,41 +71744,37 @@ "system_prompt": null } }, - "group": "DeepSeek", + "group": "GPT", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", - "model_variant_slug": "deepseek/deepseek-v3.2", + "model_variant_permaslug": "openai/gpt-oss-120b", + "model_variant_slug": "openai/gpt-oss-120b", "moderation_required": false, - "name": "Google | deepseek/deepseek-v3.2-20251201", + "name": "Google | openai/gpt-oss-120b", "pricing": { - "completion": "0.00000168", + "completion": "0.00000036", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000056", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000009", + "prompt": "0.00000009" }, "provider_display_name": "Google Vertex", "provider_info": { @@ -70500,7 +71834,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -70510,31 +71845,29 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "deepseek-ai/deepseek-v3.2-maas", + "provider_model_id": "openai/gpt-oss-120b-maas", "provider_name": "Google", - "provider_region": null, + "provider_region": "global", "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", - "response_format", "stop", "frequency_penalty", "presence_penalty", - "top_k", "repetition_penalty", - "tools", - "tool_choice", - "structured_outputs" + "top_k" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -70546,32 +71879,32 @@ "system_prompt": null } }, - "group": "DeepSeek", + "group": "GPT", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1000000, - "created_at": "2025-02-05T15:30:13.144552+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -70579,11 +71912,11 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 1000000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -70593,10 +71926,11 @@ "training": false }, "features": { - "supported_parameters": {}, - "supports_base64_video_input": true, - "supports_file_urls": true, - "supports_input_audio": true, + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -70606,7 +71940,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "65001dcf-7a48-4a35-9405-f419233ad7fc", + "id": "d14eeefb-d9fc-4732-b0d7-db609aad8308", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -70615,13 +71949,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1000000, - "created_at": "2025-02-05T15:30:13.144552+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -70629,55 +71963,52 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", - "name": "Google: Gemini 2.0 Flash", + "model_version_group_id": null, + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "google/gemini-2.0-flash-001", + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.0 Flash", - "slug": "google/gemini-2.0-flash-001", - "updated_at": "2025-11-14T23:34:05.685679+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.0-flash-001", - "model_variant_slug": "google/gemini-2.0-flash-001", + "model_variant_permaslug": "openai/gpt-oss-20b", + "model_variant_slug": "openai/gpt-oss-20b", "moderation_required": false, - "name": "Google | google/gemini-2.0-flash-001", + "name": "Google | openai/gpt-oss-20b", "pricing": { - "completion": "0.0000006", + "completion": "0.00000025", "discount": 0, - "image": "0.0000387", - "image_output": "0", - "input_cache_read": "0.0000000375", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000007", + "prompt": "0.00000007" }, "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", + "adapterName": "VertexOpenAIAdapter", "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { @@ -70733,7 +72064,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -70743,74 +72075,76 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "gemini-2.0-flash-001", + "provider_model_id": "openai/gpt-oss-20b-maas", "provider_name": "Google", - "provider_region": null, + "provider_region": "us-central1", "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", - "response_format", "stop", - "structured_outputs", - "tools", - "tool_choice" + "frequency_penalty", + "presence_penalty", + "repetition_penalty", + "top_k" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_reasoning": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", - "name": "Google: Gemini 2.0 Flash", + "model_version_group_id": null, + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "google/gemini-2.0-flash-001", + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.0 Flash", - "slug": "google/gemini-2.0-flash-001", - "updated_at": "2025-11-14T23:34:05.685679+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-02-25T17:56:52.206054+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 1048576, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -70820,10 +72154,10 @@ "training": false }, "features": { - "supported_parameters": {}, - "supports_base64_video_input": true, - "supports_file_urls": true, - "supports_input_audio": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -70833,7 +72167,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "4dc03b46-4015-495b-acff-1dcacb6c3311", + "id": "5369c603-15de-42f2-95d3-1e9559206ff9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -70842,68 +72176,58 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1048576, - "created_at": "2025-02-25T17:56:52.206054+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", - "name": "Google: Gemini 2.0 Flash Lite", + "model_version_group_id": null, + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "google/gemini-2.0-flash-lite-001", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.0 Flash Lite", - "slug": "google/gemini-2.0-flash-lite-001", - "updated_at": "2025-11-14T23:32:47.563595+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.0-flash-lite-001", - "model_variant_slug": "google/gemini-2.0-flash-lite-001", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "Google | google/gemini-2.0-flash-lite-001", + "name": "Google | qwen/qwen3-235b-a22b-07-25", "pricing": { - "completion": "0.0000003", + "completion": "0.00000088", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000075", - "request": "0", - "web_search": "0" + "prompt": "0.00000022" }, "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", + "adapterName": "VertexOpenAIAdapter", "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { @@ -70959,7 +72283,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -70969,19 +72294,23 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "gemini-2.0-flash-lite-001", + "provider_model_id": "qwen/qwen3-235b-a22b-instruct-2507-maas", "provider_name": "Google", - "provider_region": null, + "provider_region": "us-south1", "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", - "response_format", "stop", - "structured_outputs", + "frequency_penalty", + "presence_penalty", + "top_k", + "repetition_penalty", "tools", "tool_choice" ], @@ -70992,51 +72321,46 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", - "name": "Google: Gemini 2.0 Flash Lite", + "model_version_group_id": null, + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "google/gemini-2.0-flash-lite-001", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.0 Flash Lite", - "slug": "google/gemini-2.0-flash-lite-001", - "updated_at": "2025-11-14T23:32:47.563595+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-06-17T15:01:28.103313+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-23T00:29:06+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 1048576, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -71050,9 +72374,6 @@ "response_format": true, "structured_outputs": true }, - "supports_base64_video_input": true, - "supports_file_urls": true, - "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -71062,78 +72383,68 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "b9b095f0-6448-46bb-8e4c-0111ba2bf1b0", + "id": "803d4906-0d0b-49b6-8705-7ae0a4d45217", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 80, "limit_rpm_cf": null, - "max_completion_tokens": 65535, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", + "author": "qwen", "context_length": 1048576, - "created_at": "2025-06-17T15:01:28.103313+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "created_at": "2025-07-23T00:29:06+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash", - "slug": "google/gemini-2.5-flash", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash", - "model_variant_slug": "google/gemini-2.5-flash", + "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "model_variant_slug": "qwen/qwen3-coder", "moderation_required": false, - "name": "Google | google/gemini-2.5-flash", + "name": "Google | qwen/qwen3-coder-480b-a35b-07-25", "pricing": { - "completion": "0.0000025", + "completion": "0.0000018", "discount": 0, - "image": "0.001238", - "image_output": "0", - "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000022", + "prompt": "0.00000022" }, - "provider_display_name": "Google Vertex (Global)", + "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", + "adapterName": "VertexOpenAIAdapter", "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { @@ -71144,7 +72455,7 @@ "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex (Global)", + "displayName": "Google Vertex", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, @@ -71189,24 +72500,23 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, "name": "Google", "owners": ["{}"], - "slug": "google-vertex/global", + "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "gemini-2.5-flash", + "provider_model_id": "qwen/qwen3-coder-480b-a35b-instruct-maas", "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex/global", + "provider_region": "us-south1", + "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "structured_outputs", "response_format", "max_tokens", @@ -71214,61 +72524,60 @@ "top_p", "seed", "stop", + "frequency_penalty", + "presence_penalty", + "repetition_penalty", + "top_k", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash", - "slug": "google/gemini-2.5-flash", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 32768, - "created_at": "2025-10-07T20:53:51+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 32768, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -71278,8 +72587,10 @@ "training": false }, "features": { - "supported_parameters": {}, - "supports_input_audio": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -71289,30 +72600,26 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "9fc81a95-52d2-4a94-adc5-f99869ab1351", + "id": "f65277dd-2a8c-4105-a8a1-f47c6d813131", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 1000, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 32768, - "created_at": "2025-10-07T20:53:51+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "features": { "reasoning_config": { "end_token": null, @@ -71320,45 +72627,40 @@ "system_prompt": null } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-2.5-flash-image", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "output_modalities": ["text"], + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Image (Nano Banana)", - "slug": "google/gemini-2.5-flash-image", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash-image", - "model_variant_slug": "google/gemini-2.5-flash-image", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", "moderation_required": false, - "name": "Google | google/gemini-2.5-flash-image", + "name": "Google | qwen/qwen3-next-80b-a3b-instruct-2509", "pricing": { - "completion": "0.0000025", + "completion": "0.0000012", "discount": 0, - "image": "0.001238", - "image_output": "0.00003", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", + "adapterName": "VertexOpenAIAdapter", "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { @@ -71414,7 +72716,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -71424,22 +72727,29 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "gemini-2.5-flash-image", + "provider_model_id": "qwen/qwen3-next-80b-a3b-instruct-maas", "provider_name": "Google", "provider_region": "global", "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", - "response_format", - "structured_outputs" + "stop", + "frequency_penalty", + "presence_penalty", + "top_k", + "repetition_penalty", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, @@ -71450,32 +72760,32 @@ "system_prompt": null } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-2.5-flash-image", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "output_modalities": ["text"], + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Image (Nano Banana)", - "slug": "google/gemini-2.5-flash-image", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 32768, - "created_at": "2025-08-26T14:36:17+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:38:04.192907+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -71483,11 +72793,11 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Image Preview, a.k.a. \"Nano Banana,\" is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 32768, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -71497,8 +72807,11 @@ "training": false }, "features": { - "supported_parameters": {}, - "supports_input_audio": false, + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -71508,22 +72821,22 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "f73f3dbc-df0e-4e0d-b951-20c77d3b3b54", + "id": "d1deb834-ad6a-43c7-a877-3107eb9ac03f", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 1000, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 32768, - "created_at": "2025-08-26T14:36:17+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:38:04.192907+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -71531,53 +72844,49 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Image Preview, a.k.a. \"Nano Banana,\" is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Image Preview (Nano Banana)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-2.5-flash-image-preview", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "output_modalities": ["text"], + "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Image Preview (Nano Banana)", - "slug": "google/gemini-2.5-flash-image-preview", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Next 80B A3B Thinking", + "slug": "qwen/qwen3-next-80b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash-image-preview", - "model_variant_slug": "google/gemini-2.5-flash-image-preview", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-thinking", "moderation_required": false, - "name": "Google | google/gemini-2.5-flash-image-preview", + "name": "Google | qwen/qwen3-next-80b-a3b-thinking-2509", "pricing": { - "completion": "0.0000025", + "completion": "0.0000012", "discount": 0, - "image": "0.001238", - "image_output": "0.00003", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", + "adapterName": "VertexOpenAIAdapter", "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { @@ -71633,7 +72942,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -71643,70 +72953,80 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "gemini-2.5-flash-image-preview", + "provider_model_id": "qwen/qwen3-next-80b-a3b-thinking-maas", "provider_name": "Google", "provider_region": "global", "provider_slug": "google-vertex", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", - "response_format", - "structured_outputs" + "stop", + "frequency_penalty", + "presence_penalty", + "top_k", + "repetition_penalty", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Image Preview (Nano Banana)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-2.5-flash-image-preview", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "output_modalities": ["text"], + "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Image Preview (Nano Banana)", - "slug": "google/gemini-2.5-flash-image-preview", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Next 80B A3B Thinking", + "slug": "qwen/qwen3-next-80b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-07-22T16:04:36.283638+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "VertexOpenAIAdapter", "can_abort": false, - "context_length": 1048576, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", @@ -71716,13 +73036,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_base64_video_input": true, - "supports_file_urls": true, - "supports_input_audio": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -71732,7 +73046,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "ebdbef25-737d-4ac3-9e98-fd3928724e45", + "id": "f1bbf654-df93-4228-a30d-a44b5e6b98f0", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -71741,69 +73055,63 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65535, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1048576, - "created_at": "2025-07-22T16:04:36.283638+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Lite", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-lite", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Lite", - "slug": "google/gemini-2.5-flash-lite", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash-lite", - "model_variant_slug": "google/gemini-2.5-flash-lite", + "model_variant_permaslug": "z-ai/glm-4.7-20251222", + "model_variant_slug": "z-ai/glm-4.7", "moderation_required": false, - "name": "Google | google/gemini-2.5-flash-lite", + "name": "Google | z-ai/glm-4.7-20251222", "pricing": { - "completion": "0.0000004", + "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000001", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "Google Vertex", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", + "adapterName": "VertexOpenAIAdapter", "baseUrl": "not_used", "byokEnabled": true, "dataPolicy": { @@ -71859,7 +73167,8 @@ "minimax/minimax-m2-maas", "gemini-3-pro-preview", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "claude-opus-4-5@20251101", + "gemini-3-flash-preview" ], "isAbortable": false, "isMultipartSupported": true, @@ -71869,7 +73178,7 @@ "slug": "google-vertex", "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" }, - "provider_model_id": "gemini-2.5-flash-lite", + "provider_model_id": "zai-org/glm-4.7-maas", "provider_name": "Google", "provider_region": null, "provider_slug": "google-vertex", @@ -71877,13 +73186,17 @@ "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "seed", + "response_format", "stop", + "frequency_penalty", + "presence_penalty", + "top_k", + "repetition_penalty", + "structured_outputs", "tools", "tool_choice" ], @@ -71896,37 +73209,53 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Lite", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-lite", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Lite", - "slug": "google/gemini-2.5-flash-lite", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null - }, + } + ], + "name": "Google", + "slug": "google-vertex" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "displayName": "Google AI Studio", + "headquarters": "US", + "icon": { + "url": "/images/icons/GoogleAIStudio.svg" + }, + "models": [ { "author": "google", "context_length": 1048576, - "created_at": "2025-09-25T17:01:26.198818+00:00", + "created_at": "2025-02-05T15:30:13.144552+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -71934,26 +73263,20 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", + "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_base64_video_input": true, - "supports_file_urls": true, "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, @@ -71964,7 +73287,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "6e63b201-b2cc-45d9-b0c0-da687a96efa0", + "id": "8b6c3ec6-e6a0-43f7-9e09-a5487a5756c9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -71973,13 +73296,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65535, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "google", - "context_length": 1048576, - "created_at": "2025-09-25T17:01:26.198818+00:00", + "context_length": 1000000, + "created_at": "2025-02-05T15:30:13.144552+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -71987,7 +73310,7 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", + "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -72003,123 +73326,126 @@ "hidden": false, "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", + "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", + "name": "Google: Gemini 2.0 Flash", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", + "permaslug": "google/gemini-2.0-flash-001", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Lite Preview 09-2025", - "slug": "google/gemini-2.5-flash-lite-preview-09-2025", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 2.0 Flash", + "slug": "google/gemini-2.0-flash-001", + "updated_at": "2025-11-14T23:34:05.685679+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", - "model_variant_slug": "google/gemini-2.5-flash-lite-preview-09-2025", + "model_variant_permaslug": "google/gemini-2.0-flash-001", + "model_variant_slug": "google/gemini-2.0-flash-001", "moderation_required": false, - "name": "Google | google/gemini-2.5-flash-lite-preview-09-2025", + "name": "Google AI Studio | google/gemini-2.0-flash-001", "pricing": { "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "image": "0.0000001", + "input_cache_read": "0.000000025", + "internal_reasoning": "0.0000004", + "prompt": "0.0000001" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "gemini-2.5-flash-lite-preview-09-2025", - "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex", + "provider_model_id": "gemini-2.0-flash-001", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "seed", + "response_format", "stop", + "structured_outputs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" @@ -72139,25 +73465,25 @@ "hidden": false, "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", + "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", + "name": "Google: Gemini 2.0 Flash", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", + "permaslug": "google/gemini-2.0-flash-001", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Lite Preview 09-2025", - "slug": "google/gemini-2.5-flash-lite-preview-09-2025", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 2.0 Flash", + "slug": "google/gemini-2.0-flash-001", + "updated_at": "2025-11-14T23:34:05.685679+00:00", "warning_message": null }, { "author": "google", "context_length": 1048576, - "created_at": "2025-09-25T17:09:38.646963+00:00", + "created_at": "2025-02-25T17:56:52.206054+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -72165,53 +73491,46 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Preview September 2025 Checkpoint is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", + "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_base64_video_input": true, - "supports_file_urls": true, "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - }, - "supports_video_urls": true + } }, "has_chat_completions": true, "has_completions": false, - "id": "3135cae6-b7a4-49be-9994-12aebe3caf27", + "id": "cae2e26e-549c-494f-b613-99783f016f8b", "is_byok": false, - "is_deranked": false, + "is_deranked": true, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65535, - "max_prompt_tokens": null, + "max_completion_tokens": 8192, + "max_prompt_tokens": 1048576, "max_tokens_per_image": null, "model": { "author": "google", "context_length": 1048576, - "created_at": "2025-09-25T17:09:38.646963+00:00", + "created_at": "2025-02-25T17:56:52.206054+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -72219,7 +73538,7 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Preview September 2025 Checkpoint is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", + "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -72233,126 +73552,127 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "file", "text", "audio", "video"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Preview 09-2025", + "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", + "name": "Google: Gemini 2.0 Flash Lite", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-preview-09-2025", + "permaslug": "google/gemini-2.0-flash-lite-001", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Preview 09-2025", - "slug": "google/gemini-2.5-flash-preview-09-2025", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 2.0 Flash Lite", + "slug": "google/gemini-2.0-flash-lite-001", + "updated_at": "2025-11-14T23:32:47.563595+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash-preview-09-2025", - "model_variant_slug": "google/gemini-2.5-flash-preview-09-2025", + "model_variant_permaslug": "google/gemini-2.0-flash-lite-001", + "model_variant_slug": "google/gemini-2.0-flash-lite-001", "moderation_required": false, - "name": "Google | google/gemini-2.5-flash-preview-09-2025", + "name": "Google AI Studio | google/gemini-2.0-flash-lite-001", "pricing": { - "completion": "0.0000025", + "completion": "0.0000003", "discount": 0, - "image": "0.001238", - "image_output": "0", - "input_cache_read": "0.000000075", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "image": "0.000000075", + "internal_reasoning": "0.0000003", + "prompt": "0.000000075" }, - "provider_display_name": "Google Vertex (Global)", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex (Global)", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex/global", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "gemini-2.5-flash-preview-09-2025", - "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex/global", + "provider_model_id": "gemini-2.0-flash-lite-001", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "seed", + "response_format", "stop", + "structured_outputs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" @@ -72370,27 +73690,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "file", "text", "audio", "video"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Preview 09-2025", + "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", + "name": "Google: Gemini 2.0 Flash Lite", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-preview-09-2025", + "permaslug": "google/gemini-2.0-flash-lite-001", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Preview 09-2025", - "slug": "google/gemini-2.5-flash-preview-09-2025", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 2.0 Flash Lite", + "slug": "google/gemini-2.0-flash-lite-001", + "updated_at": "2025-11-14T23:32:47.563595+00:00", "warning_message": null }, { "author": "google", "context_length": 1048576, - "created_at": "2025-06-17T14:12:24+00:00", + "created_at": "2025-06-17T15:01:28.103313+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -72398,53 +73718,51 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", + "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true }, "supports_base64_video_input": true, - "supports_file_urls": true, "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": true }, "has_chat_completions": true, "has_completions": false, - "id": "43106252-bd69-498f-84f5-1fde678783f7", + "id": "16c01e26-9b91-4d22-b487-ed35634b017d", "is_byok": false, - "is_deranked": false, + "is_deranked": true, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 300, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 65535, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "google", "context_length": 1048576, - "created_at": "2025-06-17T14:12:24+00:00", + "created_at": "2025-06-17T15:01:28.103313+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -72452,11 +73770,9 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", + "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -72468,110 +73784,114 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["file", "image", "text", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Pro", + "name": "Google: Gemini 2.5 Flash", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-pro", + "permaslug": "google/gemini-2.5-flash", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Pro", - "slug": "google/gemini-2.5-pro", - "updated_at": "2026-01-08T23:55:54.79011+00:00", + "short_name": "Gemini 2.5 Flash", + "slug": "google/gemini-2.5-flash", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-pro", - "model_variant_slug": "google/gemini-2.5-pro", + "model_variant_permaslug": "google/gemini-2.5-flash", + "model_variant_slug": "google/gemini-2.5-flash", "moderation_required": false, - "name": "Google | google/gemini-2.5-pro", + "name": "Google AI Studio | google/gemini-2.5-flash", "pricing": { - "completion": "0.00001", + "completion": "0.0000025", "discount": 0, - "image": "0.00516", - "image_output": "0", - "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "image": "0.0000003", + "input_cache_read": "0.00000003", + "internal_reasoning": "0.0000025", + "prompt": "0.0000003" }, - "provider_display_name": "Google Vertex (Global)", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex (Global)", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex/global", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "gemini-2.5-pro", - "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex/global", + "provider_model_id": "gemini-2.5-flash", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ "reasoning", @@ -72589,22 +73909,11 @@ "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "completions": "0.000015", - "input_cache_read": "0.00000025", - "input_cache_write": "0.000002875", - "prompt": "0.0000025", - "threshold": 200000, - "type": "prompt-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -72616,27 +73925,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["file", "image", "text", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Pro", + "name": "Google: Gemini 2.5 Flash", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-pro", + "permaslug": "google/gemini-2.5-flash", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Pro", - "slug": "google/gemini-2.5-pro", - "updated_at": "2026-01-08T23:55:54.79011+00:00", + "short_name": "Gemini 2.5 Flash", + "slug": "google/gemini-2.5-flash", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "google", - "context_length": 1048576, - "created_at": "2025-05-07T00:41:53+00:00", + "context_length": 32768, + "created_at": "2025-10-07T20:53:51+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -72644,26 +73953,21 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", + "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 1048576, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_base64_video_input": true, + "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -72673,22 +73977,22 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "9d2cac4d-81d4-4e67-ac7a-6c73040655ee", + "id": "2341e1bc-99ea-46ab-ae2d-9a83846afb43", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 1000, "limit_rpm_cf": null, - "max_completion_tokens": 65535, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "google", - "context_length": 1048576, - "created_at": "2025-05-07T00:41:53+00:00", + "context_length": 32768, + "created_at": "2025-10-07T20:53:51+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -72696,9 +74000,8 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", + "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -72710,141 +74013,131 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Pro Preview 05-06", - "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-pro-preview-03-25", + "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", + "output_modalities": ["image", "text"], + "permaslug": "google/gemini-2.5-flash-image", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Pro Preview 05-06", - "slug": "google/gemini-2.5-pro-preview-05-06", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Flash Image (Nano Banana)", + "slug": "google/gemini-2.5-flash-image", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-pro-preview-03-25", - "model_variant_slug": "google/gemini-2.5-pro-preview-05-06", + "model_variant_permaslug": "google/gemini-2.5-flash-image", + "model_variant_slug": "google/gemini-2.5-flash-image", "moderation_required": false, - "name": "Google | google/gemini-2.5-pro-preview-03-25", + "name": "Google AI Studio | google/gemini-2.5-flash-image", "pricing": { - "completion": "0.00001", + "completion": "0.0000025", "discount": 0, - "image": "0.00516", - "image_output": "0", - "input_cache_read": "0.00000031", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "image": "0.0000003", + "image_output": "0.00003", + "input_cache_read": "0.00000003", + "internal_reasoning": "0.0000025", + "prompt": "0.0000003" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "gemini-2.5-pro-preview-05-06", - "provider_name": "Google", + "provider_model_id": "gemini-2.5-flash-image", + "provider_name": "Google AI Studio", "provider_region": null, - "provider_slug": "google-vertex", + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "seed", - "stop", - "tools", - "tool_choice" + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, - "variable_pricings": [ - { - "completions": "0.000015", - "input_cache_read": "0.000000625", - "input_cache_write": "0.000002875", - "prompt": "0.0000025", - "threshold": 200000, - "type": "prompt-threshold" - } - ], + "supports_reasoning": false, + "supports_tool_parameters": false, + "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -72856,80 +74149,89 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Pro Preview 05-06", - "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-pro-preview-03-25", + "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", + "output_modalities": ["image", "text"], + "permaslug": "google/gemini-2.5-flash-image", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Pro Preview 05-06", - "slug": "google/gemini-2.5-pro-preview-05-06", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Flash Image (Nano Banana)", + "slug": "google/gemini-2.5-flash-image", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "google", "context_length": 1048576, - "created_at": "2025-06-05T15:27:37.538116+00:00", - "default_parameters": {}, + "created_at": "2025-07-22T16:04:36.283638+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.\n", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true }, - "supports_file_urls": true, + "supports_base64_video_input": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": true }, "has_chat_completions": true, "has_completions": false, - "id": "018040ae-9cda-43cd-8813-d8ca83f6c7ed", + "id": "ce839073-aa24-4f29-8358-15b319bd05ec", "is_byok": false, - "is_deranked": false, + "is_deranked": true, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 65535, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "google", "context_length": 1048576, - "created_at": "2025-06-05T15:27:37.538116+00:00", - "default_parameters": {}, + "created_at": "2025-07-22T16:04:36.283638+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.\n", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -72941,110 +74243,114 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text", "audio"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Pro Preview 06-05", + "name": "Google: Gemini 2.5 Flash Lite", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-pro-preview-06-05", + "permaslug": "google/gemini-2.5-flash-lite", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Pro Preview 06-05", - "slug": "google/gemini-2.5-pro-preview", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Flash Lite", + "slug": "google/gemini-2.5-flash-lite", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-pro-preview-06-05", - "model_variant_slug": "google/gemini-2.5-pro-preview", + "model_variant_permaslug": "google/gemini-2.5-flash-lite", + "model_variant_slug": "google/gemini-2.5-flash-lite", "moderation_required": false, - "name": "Google | google/gemini-2.5-pro-preview-06-05", + "name": "Google AI Studio | google/gemini-2.5-flash-lite", "pricing": { - "completion": "0.00001", + "completion": "0.0000004", "discount": 0, - "image": "0.00516", - "image_output": "0", - "input_cache_read": "0.00000031", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "image": "0.0000001", + "input_cache_read": "0.00000001", + "internal_reasoning": "0.0000004", + "prompt": "0.0000001" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "gemini-2.5-pro-preview-06-05", - "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex", + "provider_model_id": "gemini-2.5-flash-lite", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ "reasoning", @@ -73062,19 +74368,11 @@ "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "completions": "0.000015", - "input_cache_read": "0.000000625", - "input_cache_write": "0.000002875", - "prompt": "0.0000025", - "threshold": 200000, - "type": "prompt-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -73086,27 +74384,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text", "audio"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Pro Preview 06-05", + "name": "Google: Gemini 2.5 Flash Lite", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-pro-preview-06-05", + "permaslug": "google/gemini-2.5-flash-lite", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Pro Preview 06-05", - "slug": "google/gemini-2.5-pro-preview", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Flash Lite", + "slug": "google/gemini-2.5-flash-lite", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "google", "context_length": 1048576, - "created_at": "2025-12-17T15:57:58+00:00", + "created_at": "2025-09-25T17:01:26.198818+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -73114,23 +74412,25 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.\n\nThe model supports a 1M token context window and multimodal inputs including text, images, audio, video, and PDFs, with text output. It includes configurable reasoning via thinking levels (minimal, low, medium, high), structured output, tool use, and automatic context caching. Gemini 3 Flash Preview is optimized for users who want strong reasoning and agentic behavior without the cost or latency of full scale frontier models.", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_base64_video_input": true, "supports_file_urls": true, - "supports_implicit_caching": true, "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, @@ -73142,7 +74442,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "d06bdb18-3cc2-46b0-bf23-922e485dc255", + "id": "fe48694e-e3fb-43a0-98fd-ca364053a9de", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -73151,13 +74451,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65535, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "google", "context_length": 1048576, - "created_at": "2025-12-17T15:57:58+00:00", + "created_at": "2025-09-25T17:01:26.198818+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -73165,11 +74465,9 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.\n\nThe model supports a 1M token context window and multimodal inputs including text, images, audio, video, and PDFs, with text output. It includes configurable reasoning via thinking levels (minimal, low, medium, high), structured output, tool use, and automatic context caching. Gemini 3 Flash Preview is optimized for users who want strong reasoning and agentic behavior without the cost or latency of full scale frontier models.", + "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -73184,120 +74482,124 @@ "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 3 Flash Preview", + "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", "output_modalities": ["text"], - "permaslug": "google/gemini-3-flash-preview-20251217", + "permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 3 Flash Preview", - "slug": "google/gemini-3-flash-preview", - "updated_at": "2025-12-17T16:17:44.159277+00:00", + "short_name": "Gemini 2.5 Flash Lite Preview 09-2025", + "slug": "google/gemini-2.5-flash-lite-preview-09-2025", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-3-flash-preview-20251217", - "model_variant_slug": "google/gemini-3-flash-preview", + "model_variant_permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", + "model_variant_slug": "google/gemini-2.5-flash-lite-preview-09-2025", "moderation_required": false, - "name": "Google | google/gemini-3-flash-preview-20251217", + "name": "Google AI Studio | google/gemini-2.5-flash-lite-preview-09-2025", "pricing": { - "completion": "0.000003", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000005", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "image": "0.0000001", + "input_cache_read": "0.00000001", + "internal_reasoning": "0.0000004", + "prompt": "0.0000001" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "gemini-3-flash-preview", - "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex", + "provider_model_id": "gemini-2.5-flash-lite-preview-09-2025", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", - "response_format", "stop", - "structured_outputs", - "tool_choice", - "tools" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, @@ -73306,9 +74608,7 @@ "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -73323,24 +74623,24 @@ "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 3 Flash Preview", + "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", "output_modalities": ["text"], - "permaslug": "google/gemini-3-flash-preview-20251217", + "permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 3 Flash Preview", - "slug": "google/gemini-3-flash-preview", - "updated_at": "2025-12-17T16:17:44.159277+00:00", + "short_name": "Gemini 2.5 Flash Lite Preview 09-2025", + "slug": "google/gemini-2.5-flash-lite-preview-09-2025", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "google", "context_length": 1048576, - "created_at": "2025-11-18T14:04:28+00:00", + "created_at": "2025-09-25T17:09:38.646963+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -73348,27 +74648,27 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.", + "description": "Gemini 2.5 Flash Preview September 2025 Checkpoint is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true }, "supports_base64_video_input": true, "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -73379,7 +74679,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "ca4e1f52-1f15-4258-a4eb-d6b56c0c4057", + "id": "ab314496-abd4-4392-8e64-f021533bf6bf", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -73394,7 +74694,7 @@ "model": { "author": "google", "context_length": 1048576, - "created_at": "2025-11-18T14:04:28+00:00", + "created_at": "2025-09-25T17:09:38.646963+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -73402,11 +74702,9 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.", + "description": "Gemini 2.5 Flash Preview September 2025 Checkpoint is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -73418,110 +74716,114 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["image", "file", "text", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 3 Pro Preview", + "name": "Google: Gemini 2.5 Flash Preview 09-2025", "output_modalities": ["text"], - "permaslug": "google/gemini-3-pro-preview-20251117", + "permaslug": "google/gemini-2.5-flash-preview-09-2025", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 3 Pro Preview", - "slug": "google/gemini-3-pro-preview", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Flash Preview 09-2025", + "slug": "google/gemini-2.5-flash-preview-09-2025", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-3-pro-preview-20251117", - "model_variant_slug": "google/gemini-3-pro-preview", + "model_variant_permaslug": "google/gemini-2.5-flash-preview-09-2025", + "model_variant_slug": "google/gemini-2.5-flash-preview-09-2025", "moderation_required": false, - "name": "Google | google/gemini-3-pro-preview-20251117", + "name": "Google AI Studio | google/gemini-2.5-flash-preview-09-2025", "pricing": { - "completion": "0.000012", + "completion": "0.0000025", "discount": 0, - "image": "0.008256", - "image_output": "0", - "input_cache_read": "0.0000002", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0" + "image": "0.0000003", + "input_cache_read": "0.00000003", + "internal_reasoning": "0.0000025", + "prompt": "0.0000003" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "gemini-3-pro-preview", - "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex", + "provider_model_id": "gemini-2.5-flash-preview-09-2025", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ "reasoning", @@ -73539,22 +74841,11 @@ "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "completions": "0.000018", - "input_cache_read": "0.0000004", - "input_cache_write": "0.000004375", - "prompt": "0.000004", - "threshold": 200000, - "type": "prompt-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -73566,27 +74857,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["image", "file", "text", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 3 Pro Preview", + "name": "Google: Gemini 2.5 Flash Preview 09-2025", "output_modalities": ["text"], - "permaslug": "google/gemini-3-pro-preview-20251117", + "permaslug": "google/gemini-2.5-flash-preview-09-2025", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 3 Pro Preview", - "slug": "google/gemini-3-pro-preview", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Flash Preview 09-2025", + "slug": "google/gemini-2.5-flash-preview-09-2025", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "google", - "context_length": 65536, - "created_at": "2025-11-20T15:49:57.064095+00:00", + "context_length": 1048576, + "created_at": "2025-06-17T14:12:24+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -73594,47 +74885,52 @@ }, "default_stops": [], "default_system": null, - "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\n\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 65536, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { "is_mandatory_reasoning": true, - "supports_input_audio": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_base64_video_input": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": true }, "has_chat_completions": true, "has_completions": false, - "id": "be4c8ae2-b6e5-4e54-b556-733445e52f1e", + "id": "b261d15b-95ce-4c8c-8bce-00eb0dc1ff77", "is_byok": false, - "is_deranked": false, + "is_deranked": true, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 250, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "google", - "context_length": 65536, - "created_at": "2025-11-20T15:49:57.064095+00:00", + "context_length": 1048576, + "created_at": "2025-06-17T14:12:24+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -73642,7 +74938,7 @@ }, "default_stops": [], "default_system": null, - "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\n\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.", + "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", "features": { "chat_template_config": { "should_hoist_and_merge_system_messages": null @@ -73658,125 +74954,141 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-3-pro-image-preview-20251120", + "name": "Google: Gemini 2.5 Pro", + "output_modalities": ["text"], + "permaslug": "google/gemini-2.5-pro", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Nano Banana Pro (Gemini 3 Pro Image Preview)", - "slug": "google/gemini-3-pro-image-preview", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Pro", + "slug": "google/gemini-2.5-pro", + "updated_at": "2026-01-08T23:55:54.79011+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-3-pro-image-preview-20251120", - "model_variant_slug": "google/gemini-3-pro-image-preview", + "model_variant_permaslug": "google/gemini-2.5-pro", + "model_variant_slug": "google/gemini-2.5-pro", "moderation_required": false, - "name": "Google | google/gemini-3-pro-image-preview-20251120", + "name": "Google AI Studio | google/gemini-2.5-pro", "pricing": { - "completion": "0.000012", + "completion": "0.00001", "discount": 0, - "image": "0.067", - "image_output": "0.00012", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0" + "image": "0.00000125", + "input_cache_read": "0.000000125", + "internal_reasoning": "0.00001", + "prompt": "0.00000125" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "gemini-3-pro-image-preview", - "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex", + "provider_model_id": "gemini-2.5-pro", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", - "response_format", "stop", - "structured_outputs" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, - "variable_pricings": [], + "supports_tool_parameters": true, + "variable_pricings": [ + { + "completions": "0.000015", + "input_cache_read": "0.00000025", + "input_cache_write": "0.00000075", + "prompt": "0.0000025", + "threshold": 200000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, "features": { @@ -73794,442 +75106,535 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image", "file", "audio", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-3-pro-image-preview-20251120", + "name": "Google: Gemini 2.5 Pro", + "output_modalities": ["text"], + "permaslug": "google/gemini-2.5-pro", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Nano Banana Pro (Gemini 3 Pro Image Preview)", - "slug": "google/gemini-3-pro-image-preview", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemini 2.5 Pro", + "slug": "google/gemini-2.5-pro", + "updated_at": "2026-01-08T23:55:54.79011+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 128000, - "created_at": "2024-07-23T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "google", + "context_length": 1048576, + "created_at": "2025-12-17T15:57:58+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.\n\nThe model supports a 1M token context window and multimodal inputs including text, images, audio, video, and PDFs, with text output. It includes configurable reasoning via thinking levels (minimal, low, medium, high), structured output, tool use, and automatic context caching. Gemini 3 Flash Preview is optimized for users who want strong reasoning and agentic behavior without the cost or latency of full scale frontier models.", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 128000, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supports_base64_video_input": true, + "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": true }, "has_chat_completions": true, "has_completions": false, - "id": "a8eea0ee-a665-45dd-8e0a-b8f6cb8edf43", + "id": "6e82f652-2693-4b60-bbe0-7362d6e345f0", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 450, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "google", + "context_length": 1048576, + "created_at": "2025-12-17T15:57:58+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.\n\nThe model supports a 1M token context window and multimodal inputs including text, images, audio, video, and PDFs, with text output. It includes configurable reasoning via thinking levels (minimal, low, medium, high), structured output, tool use, and automatic context caching. Gemini 3 Flash Preview is optimized for users who want strong reasoning and agentic behavior without the cost or latency of full scale frontier models.", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Gemini", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-405B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", - "name": "Meta: Llama 3.1 405B Instruct", + "input_modalities": ["text", "image", "file", "audio", "video"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Google: Gemini 3 Flash Preview", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-405b-instruct", - "reasoning_config": null, + "permaslug": "google/gemini-3-flash-preview-20251217", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 405B Instruct", - "slug": "meta-llama/llama-3.1-405b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 3 Flash Preview", + "slug": "google/gemini-3-flash-preview", + "updated_at": "2025-12-17T16:17:44.159277+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-405b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-405b-instruct", + "model_variant_permaslug": "google/gemini-3-flash-preview-20251217", + "model_variant_slug": "google/gemini-3-flash-preview", "moderation_required": false, - "name": "Google | meta-llama/llama-3.1-405b-instruct", + "name": "Google AI Studio | google/gemini-3-flash-preview-20251217", "pricing": { - "completion": "0.000016", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000005", - "request": "0", - "web_search": "0" + "image": "0.0000005", + "input_cache_read": "0.00000005", + "internal_reasoning": "0.000003", + "prompt": "0.0000005" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "meta/llama-3.1-405b-instruct-maas", - "provider_name": "Google", - "provider_region": "us-central1", - "provider_slug": "google-vertex", + "provider_model_id": "gemini-3-flash-preview", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "seed", + "response_format", "stop", - "frequency_penalty", - "presence_penalty", - "top_k", + "structured_outputs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Gemini", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-405B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", - "name": "Meta: Llama 3.1 405B Instruct", + "input_modalities": ["text", "image", "file", "audio", "video"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Google: Gemini 3 Flash Preview", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-405b-instruct", - "reasoning_config": null, + "permaslug": "google/gemini-3-flash-preview-20251217", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 405B Instruct", - "slug": "meta-llama/llama-3.1-405b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 3 Flash Preview", + "slug": "google/gemini-3-flash-preview", + "updated_at": "2025-12-17T16:17:44.159277+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 128000, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "google", + "context_length": 1048576, + "created_at": "2025-11-18T14:04:28+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.", "endpoint": { - "adapter_name": "GoogleVertexGeminiAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 128000, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { + "is_mandatory_reasoning": true, "supported_parameters": { - "response_format": false, - "structured_outputs": false + "response_format": true, + "structured_outputs": true }, + "supports_base64_video_input": true, + "supports_file_urls": true, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": true }, "has_chat_completions": true, "has_completions": false, - "id": "4b49bf33-fc17-4e36-97ec-a163b306d8d9", + "id": "c9badacd-c552-40a2-86e6-4b4e8dd4318f", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 300, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "google", + "context_length": 1048576, + "created_at": "2025-11-18T14:04:28+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", - "features": {}, - "group": "Llama3", + "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Gemini", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "input_modalities": ["text", "image", "file", "audio", "video"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Google: Gemini 3 Pro Preview", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "google/gemini-3-pro-preview-20251117", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 3 Pro Preview", + "slug": "google/gemini-3-pro-preview", + "updated_at": "2026-01-27T15:11:59.578671+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_permaslug": "google/gemini-3-pro-preview-20251117", + "model_variant_slug": "google/gemini-3-pro-preview", "moderation_required": false, - "name": "Google | meta-llama/llama-3.3-70b-instruct", + "name": "Google AI Studio | google/gemini-3-pro-preview-20251117", "pricing": { - "completion": "0.00000072", + "completion": "0.000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000072", - "request": "0", - "web_search": "0" + "image": "0.000002", + "input_cache_read": "0.0000002", + "internal_reasoning": "0.000012", + "prompt": "0.000002" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "GoogleVertexGeminiAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "llama-3.3-70b-instruct-maas", - "provider_name": "Google", + "provider_model_id": "gemini-3-pro-preview", + "provider_name": "Google AI Studio", "provider_region": null, - "provider_slug": "google-vertex", + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "seed", - "response_format", "stop", - "frequency_penalty", - "presence_penalty" + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, - "variable_pricings": [], + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [ + { + "completions": "0.000018", + "input_cache_read": "0.0000004", + "input_cache_write": "0.00000075", + "prompt": "0.000004", + "threshold": 200000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Gemini", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "input_modalities": ["text", "image", "file", "audio", "video"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Google: Gemini 3 Pro Preview", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "google/gemini-3-pro-preview-20251117", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemini 3 Pro Preview", + "slug": "google/gemini-3-pro-preview", + "updated_at": "2026-01-27T15:11:59.578671+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 524288, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, + "author": "google", + "context_length": 20000, + "created_at": "2025-10-31T20:43:30.264019+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "description": "gemini-embedding-001 provides a unified cutting edge experience across domains, including science, legal, finance, and coding. This embedding model has consistently held a top spot on the Massive Text Embedding Benchmark (MTEB) Multilingual leaderboard since the experimental launch in March.", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 524288, + "context_length": 20000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -74239,7 +75644,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "3479a1df-3f25-46b3-9e3d-bb323bee5d23", + "id": "b922a8da-940a-4277-8ab0-f3c2a61f89d7", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -74248,186 +75653,207 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, + "author": "google", + "context_length": 20000, + "created_at": "2025-10-31T20:43:30.264019+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", - "features": {}, - "group": "Llama4", - "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "description": "gemini-embedding-001 provides a unified cutting edge experience across domains, including science, legal, finance, and coding. This embedding model has consistently held a top spot on the Massive Text Embedding Benchmark (MTEB) Multilingual leaderboard since the experimental launch in March.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Gemini", + "has_text_output": false, + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", - "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "reasoning_config": null, + "name": "Google: Gemini Embedding 001", + "output_modalities": ["embeddings"], + "permaslug": "google/gemini-embedding-001", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Gemini Embedding 001", + "slug": "google/gemini-embedding-001", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "model_variant_slug": "meta-llama/llama-4-maverick", + "model_variant_permaslug": "google/gemini-embedding-001", + "model_variant_slug": "google/gemini-embedding-001", "moderation_required": false, - "name": "Google | meta-llama/llama-4-maverick-17b-128e-instruct", + "name": "Google AI Studio | google/gemini-embedding-001", "pricing": { - "completion": "0.00000115", + "completion": "0", "discount": 0, "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000035", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "meta/llama-4-maverick-17b-128e-instruct-maas", - "provider_name": "Google", - "provider_region": "us-east5", - "provider_slug": "google-vertex", + "provider_model_id": "gemini-embedding-001", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "seed", - "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "tools", - "tool_choice" + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama4", - "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Gemini", + "has_text_output": false, + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", - "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "reasoning_config": null, + "name": "Google: Gemini Embedding 001", + "output_modalities": ["embeddings"], + "permaslug": "google/gemini-embedding-001", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Gemini Embedding 001", + "slug": "google/gemini-embedding-001", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 1310720, - "created_at": "2025-04-05T19:31:59.735804+00:00", + "author": "google", + "context_length": 32768, + "created_at": "2025-03-13T21:50:25.140801+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["", "", ""], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 1310720, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", - "training": false + "training": true }, "features": { "supported_parameters": { - "response_format": true, - "structured_outputs": true + "structured_outputs": false }, "supports_tool_choice": { "literal_auto": true, @@ -74438,11 +75864,11 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "6b6ca1d2-f0a3-4b14-afd1-9ec651371658", + "id": "a6489e9a-e430-438f-9aa9-d6a664362e6e", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, @@ -74451,186 +75877,175 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 10000000, - "created_at": "2025-04-05T19:31:59.735804+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-13T21:50:25.140801+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["", "", ""], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)", "features": {}, - "group": "Llama4", + "group": "Gemini", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": "google/gemma-3-12b-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 12B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "permaslug": "google/gemma-3-12b-it", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Gemma 3 12B", + "slug": "google/gemma-3-12b-it", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", - "model_variant_slug": "meta-llama/llama-4-scout", + "model_variant_permaslug": "google/gemma-3-12b-it:free", + "model_variant_slug": "google/gemma-3-12b-it:free", "moderation_required": false, - "name": "Google | meta-llama/llama-4-scout-17b-16e-instruct", + "name": "Google AI Studio | google/gemma-3-12b-it:free", "pricing": { - "completion": "0.0000007", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "meta/llama-4-scout-17b-16e-instruct-maas", - "provider_name": "Google", - "provider_region": "us-east5", - "provider_slug": "google-vertex", + "provider_model_id": "gemma-3-12b-it", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", - "supported_parameters": [ - "structured_outputs", - "response_format", - "max_tokens", - "temperature", - "top_p", - "seed", - "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "tools", - "tool_choice" - ], + "supported_parameters": ["max_tokens", "temperature", "top_p", "seed", "stop"], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": {}, - "group": "Llama4", + "group": "Gemini", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": "google/gemma-3-12b-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 12B (free)", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "permaslug": "google/gemma-3-12b-it", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Gemma 3 12B (free)", + "slug": "google/gemma-3-12b-it", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "minimax", - "context_length": 196608, - "created_at": "2025-10-23T20:41:33.120854+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-12T05:12:39.645813+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["", "", ""], "default_system": null, - "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 196608, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", - "training": false + "training": true }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "content-string", - "should_send_reasoning_text_in_text_content": true, + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -74640,231 +76055,216 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "c80588c8-ce64-46f2-b375-19bf4504133e", + "id": "a0fad959-d114-4f6d-bd75-4d2ee389e257", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 196608, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-10-23T20:41:33.120854+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-12T05:12:39.645813+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["", "", ""], "default_system": null, - "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2", + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MiniMax: MiniMax M2", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "MiniMax M2", - "slug": "minimax/minimax-m2", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemma 3 27B", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, - "model_variant_permaslug": "minimax/minimax-m2", - "model_variant_slug": "minimax/minimax-m2", + "model_variant_permaslug": "google/gemma-3-27b-it:free", + "model_variant_slug": "google/gemma-3-27b-it:free", "moderation_required": false, - "name": "Google | minimax/minimax-m2", + "name": "Google AI Studio | google/gemma-3-27b-it:free", "pricing": { - "completion": "0.0000012", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "minimaxai/minimax-m2-maas", - "provider_name": "Google", + "provider_model_id": "gemma-3-27b-it", + "provider_name": "Google AI Studio", "provider_region": null, - "provider_slug": "google-vertex", + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "seed", "response_format", - "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "structured_outputs", - "tools", - "tool_choice" + "stop" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2", + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MiniMax: MiniMax M2", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B (free)", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "MiniMax M2", - "slug": "minimax/minimax-m2", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemma 3 27B (free)", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "google", + "context_length": 32768, + "created_at": "2025-03-13T22:38:30.653142+00:00", + "default_parameters": {}, + "default_stops": ["", "", ""], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", - "training": false + "training": true }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": { + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -74874,230 +76274,189 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "e841d362-719d-4f53-86c8-f83f5bbd7361", + "id": "90fbd988-266d-4ef4-b345-63b46ab6caca", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "google", + "context_length": 131072, + "created_at": "2025-03-13T22:38:30.653142+00:00", + "default_parameters": {}, + "default_stops": ["", "", ""], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.", + "features": {}, + "group": "Gemini", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "google/gemma-3-4b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 4B", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "google/gemma-3-4b-it", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemma 3 4B", + "slug": "google/gemma-3-4b-it", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", + "model_variant_permaslug": "google/gemma-3-4b-it:free", + "model_variant_slug": "google/gemma-3-4b-it:free", "moderation_required": false, - "name": "Google | moonshotai/kimi-k2-thinking-20251106", + "name": "Google AI Studio | google/gemma-3-4b-it:free", "pricing": { - "completion": "0.0000025", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "moonshotai/kimi-k2-thinking-maas", - "provider_name": "Google", + "provider_model_id": "gemma-3-4b-it", + "provider_name": "Google AI Studio", "provider_region": null, - "provider_slug": "google-vertex", + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "seed", "response_format", - "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "structured_outputs", - "tools", - "tool_choice" + "stop" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], - "variant": "standard" - }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } + "variant": "free" }, - "group": "Other", + "features": {}, + "group": "Gemini", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "google/gemma-3-4b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 4B (free)", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "google/gemma-3-4b-it", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemma 3 4B (free)", + "slug": "google/gemma-3-4b-it", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "google", + "context_length": 8192, + "created_at": "2025-07-09T15:28:24.676165+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture. Based on the MatFormer architecture, it supports nested submodels and modular composition via the Mix-and-Match framework. Gemma 3n models are optimized for low-resource deployment, offering 32K context length and strong multilingual and reasoning performance across common benchmarks. This variant is trained on a diverse corpus including code, math, web, and multimodal data.", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", - "training": false + "training": true }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, + "supports_multipart": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -75107,228 +76466,210 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "93ec7f6d-c7aa-44c1-b9a7-2cd664a17b13", + "id": "d211b1a5-6379-4926-ac35-4f416b01f423", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "google", + "context_length": 8192, + "created_at": "2025-07-09T15:28:24.676165+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Gemma 3n E2B IT is a multimodal, instruction-tuned model developed by Google DeepMind, designed to operate efficiently at an effective parameter size of 2B while leveraging a 6B architecture. Based on the MatFormer architecture, it supports nested submodels and modular composition via the Mix-and-Match framework. Gemma 3n models are optimized for low-resource deployment, offering 32K context length and strong multilingual and reasoning performance across common benchmarks. This variant is trained on a diverse corpus including code, math, web, and multimodal data.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "google/gemma-3n-E2B-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Google: Gemma 3n 2B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "google/gemma-3n-e2b-it", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemma 3n 2B", + "slug": "google/gemma-3n-e2b-it", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b", - "model_variant_slug": "openai/gpt-oss-120b", + "model_variant_permaslug": "google/gemma-3n-e2b-it:free", + "model_variant_slug": "google/gemma-3n-e2b-it:free", "moderation_required": false, - "name": "Google | openai/gpt-oss-120b", + "name": "Google AI Studio | google/gemma-3n-e2b-it:free", "pricing": { - "completion": "0.00000036", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" - }, - "provider_model_id": "openai/gpt-oss-120b-maas", - "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex", + "slug": "google-ai-studio", + "statusPageUrl": null + }, + "provider_model_id": "gemma-3n-e2b-it", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "seed", + "response_format", "stop", "frequency_penalty", - "presence_penalty", - "repetition_penalty", - "top_k" + "presence_penalty" ], - "supports_multipart": true, - "supports_reasoning": true, + "supports_multipart": false, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "google/gemma-3n-E2B-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Google: Gemma 3n 2B (free)", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "google/gemma-3n-e2b-it", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemma 3n 2B (free)", + "slug": "google/gemma-3n-e2b-it", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "google", + "context_length": 8192, + "created_at": "2025-05-20T21:33:44.157973+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.\n\nThis model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions. [Read more in the blog post](https://developers.googleblog.com/en/introducing-gemma-3n/)", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", - "training": false + "training": true }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -75338,227 +76679,215 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "d14eeefb-d9fc-4732-b0d7-db609aad8308", + "id": "918e82fa-161a-4cc8-b482-d2c2e46e1b9c", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "google", + "context_length": 32000, + "created_at": "2025-05-20T21:33:44.157973+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.\n\nThis model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions. [Read more in the blog post](https://developers.googleblog.com/en/introducing-gemma-3n/)", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "google/gemma-3n-E4B-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Google: Gemma 3n 4B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "google/gemma-3n-e4b-it", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemma 3n 4B", + "slug": "google/gemma-3n-e4b-it", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "google/gemma-3n-e4b-it:free", + "model_variant_slug": "google/gemma-3n-e4b-it:free", "moderation_required": false, - "name": "Google | openai/gpt-oss-20b", + "name": "Google AI Studio | google/gemma-3n-e4b-it:free", "pricing": { - "completion": "0.00000025", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000007", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-20b-maas", - "provider_name": "Google", - "provider_region": "us-central1", - "provider_slug": "google-vertex", + "provider_model_id": "gemma-3n-e4b-it", + "provider_name": "Google AI Studio", + "provider_region": null, + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "seed", + "response_format", "stop", "frequency_penalty", - "presence_penalty", - "repetition_penalty", - "top_k" + "presence_penalty" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "google/gemma-3n-E4B-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Google: Gemma 3n 4B (free)", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "google/gemma-3n-e4b-it", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemma 3n 4B (free)", + "slug": "google/gemma-3n-e4b-it", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, + "author": "google", + "context_length": 65536, + "created_at": "2025-11-20T15:49:57.064095+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\n\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GoogleAIStudioGeminiAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 65536, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "is_mandatory_reasoning": true, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -75568,217 +76897,241 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "5369c603-15de-42f2-95d3-1e9559206ff9", + "id": "f5a725e2-8f66-4754-9232-7550685ab3d0", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 250, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, + "author": "google", + "context_length": 65536, + "created_at": "2025-11-20T15:49:57.064095+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\n\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", + "group": "Gemini", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)", + "output_modalities": ["image", "text"], + "permaslug": "google/gemini-3-pro-image-preview-20251120", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Nano Banana Pro (Gemini 3 Pro Image Preview)", + "slug": "google/gemini-3-pro-image-preview", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "google/gemini-3-pro-image-preview-20251120", + "model_variant_slug": "google/gemini-3-pro-image-preview", "moderation_required": false, - "name": "Google | qwen/qwen3-235b-a22b-07-25", + "name": "Google AI Studio | google/gemini-3-pro-image-preview-20251120", "pricing": { - "completion": "0.00000088", + "completion": "0.000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000022", - "request": "0", - "web_search": "0" + "image": "0.000002", + "image_output": "0.00012", + "input_cache_read": "0.0000002", + "internal_reasoning": "0.000012", + "prompt": "0.000002" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Google AI Studio", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GoogleAIStudioGeminiAdapter", + "baseUrl": "https://generativelanguage.googleapis.com/v1beta", "byokEnabled": true, "dataPolicy": { "canPublish": false, "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, - "retainsPrompts": false, + "retainsPrompts": true, "termsOfServiceURL": "https://cloud.google.com/terms/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Google AI Studio", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "/images/icons/GoogleAIStudio.svg" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", + "gemini-2.5-pro-1p", + "gemini-2.5-pro-1p-recitation-off", + "gemini-2.5-flash-1p", + "gemini-2.5-flash-1p-recitation-off", + "gemini-2.5-flash-lite-preview-06-11-summarized", + "gemini-2.5-flash", "gemini-2.5-flash-lite-preview-06-17", "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", + "gemini-1.5-pro-latest", + "gemini-1.5-pro", + "gemini-1.5-flash-latest", + "gemini-1.5-flash", + "gemini-1.5-flash-8b", + "gemini-1.5-flash-8b-latest", + "gemini-2.5-pro-preview-03-25", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-2.0-flash-lite-preview", + "gemini-2.0-pro-exp", + "gemini-2.0-flash-thinking-exp", + "gemini-2.5-flash-preview-tts", + "gemini-2.5-pro-preview-tts", + "learnlm-2.0-flash-experimental", + "gracefulgolem", + "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", + "gemini-2.0-flash-exp-image-generation", + "gemini-2.0-flash-preview-image-generation", + "gemini-2.0-flash-lite-preview-02-05", + "gemini-2.0-pro-exp-02-05", + "gemini-exp-1206", + "gemini-2.0-flash-thinking-exp-01-21", + "gemini-2.0-flash-thinking-exp-1219", + "gemma-3-1b-it", + "gemini-flash-latest", + "gemini-flash-lite-latest", + "gemini-pro-latest", + "gemini-robotics-er-1.5-preview", + "gemini-embedding-001", + "riftrunner-fst-rewind", "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "nano-banana-pro-preview", + "fiercefalcon", + "fiercefalcon-inline-citation", + "fiercefalcon-blocked-sites" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", + "name": "Google AI Studio", "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "slug": "google-ai-studio", + "statusPageUrl": null }, - "provider_model_id": "qwen/qwen3-235b-a22b-instruct-2507-maas", - "provider_name": "Google", - "provider_region": "us-south1", - "provider_slug": "google-vertex", + "provider_model_id": "gemini-3-pro-image-preview", + "provider_name": "Google AI Studio", + "provider_region": "global", + "provider_slug": "google-ai-studio", "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "seed", + "response_format", "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice" + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_reasoning": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", + "group": "Gemini", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)", + "output_modalities": ["image", "text"], + "permaslug": "google/gemini-3-pro-image-preview-20251120", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Nano Banana Pro (Gemini 3 Pro Image Preview)", + "slug": "google/gemini-3-pro-image-preview", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null - }, + } + ], + "name": "Google AI Studio", + "slug": "google-ai-studio" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Groq", + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + }, + "models": [ { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-23T00:29:06+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GroqAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, + "privacyPolicyURL": "https://groq.com/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -75788,154 +77141,109 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "803d4906-0d0b-49b6-8705-7ae0a4d45217", + "id": "74af5aaf-6ad5-4389-ae0f-1bd2c7337386", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 80, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 1048576, - "created_at": "2025-07-23T00:29:06+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.1-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "model_variant_slug": "qwen/qwen3-coder", + "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", "moderation_required": false, - "name": "Google | qwen/qwen3-coder-480b-a35b-07-25", + "name": "Groq | meta-llama/llama-3.1-8b-instruct", "pricing": { - "completion": "0.0000018", + "completion": "0.00000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000022", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000025", + "prompt": "0.00000005" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Groq", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GroqAdapter", + "baseUrl": "https://api.groq.com/openai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, + "privacyPolicyURL": "https://groq.com/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Groq", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", - "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", - "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "groq/compound-mini", + "groq/compound", + "playai-tts-arabic", + "playai-tts", + "whisper-large-v3-turbo", + "meta-llama/llama-prompt-guard-2-22m", + "whisper-large-v3", + "allam-2-7b", + "meta-llama/llama-prompt-guard-2-86m", + "moonshotai/kimi-k2-instruct", + "canopylabs/orpheus-v1-english", + "canopylabs/orpheus-arabic-saudi" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", - "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "name": "Groq", + "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], + "slug": "groq", + "statusPageUrl": "https://status.groq.com/" }, - "provider_model_id": "qwen/qwen3-coder-480b-a35b-instruct-maas", - "provider_name": "Google", - "provider_region": "us-south1", - "provider_slug": "google-vertex", + "provider_model_id": "llama-3.1-8b-instant", + "provider_name": "Groq", + "provider_region": null, + "provider_slug": "groq", "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "seed", "stop", - "frequency_penalty", - "presence_penalty", - "repetition_penalty", - "top_k", + "seed", + "response_format", "tools", "tool_choice" ], @@ -75945,60 +77253,46 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.1-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GroqAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, + "privacyPolicyURL": "https://groq.com/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -76008,7 +77302,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "f65277dd-2a8c-4105-a8a1-f47c6d813131", + "id": "0d4cc43e-1b8b-4694-b2d5-57ea43dc0909", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -76017,145 +77311,99 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "Google | qwen/qwen3-next-80b-a3b-instruct-2509", + "name": "Groq | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0.0000012", + "completion": "0.00000079", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000059" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Groq", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GroqAdapter", + "baseUrl": "https://api.groq.com/openai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, + "privacyPolicyURL": "https://groq.com/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Groq", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", - "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", - "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" + "groq/compound-mini", + "groq/compound", + "playai-tts-arabic", + "playai-tts", + "whisper-large-v3-turbo", + "meta-llama/llama-prompt-guard-2-22m", + "whisper-large-v3", + "allam-2-7b", + "meta-llama/llama-prompt-guard-2-86m", + "moonshotai/kimi-k2-instruct", + "canopylabs/orpheus-v1-english", + "canopylabs/orpheus-arabic-saudi" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google", - "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "name": "Groq", + "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], + "slug": "groq", + "statusPageUrl": "https://status.groq.com/" }, - "provider_model_id": "qwen/qwen3-next-80b-a3b-instruct-maas", - "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex", + "provider_model_id": "llama-3.3-70b-versatile", + "provider_name": "Groq", + "provider_region": null, + "provider_slug": "groq", "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "seed", "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", + "seed", + "response_format", "tools", "tool_choice" ], @@ -76165,65 +77413,46 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:38:04.192907+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 131072, + "created_at": "2025-04-05T19:37:02.129674+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", "endpoint": { - "adapter_name": "VertexOpenAIAdapter", + "adapter_name": "GroqAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, + "privacyPolicyURL": "https://groq.com/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -76233,7 +77462,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "d1deb834-ad6a-43c7-a877-3107eb9ac03f", + "id": "6a734347-7939-41c5-99af-a6d0ac13b1be", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -76242,233 +77471,148 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:38:04.192907+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 1048576, + "created_at": "2025-04-05T19:37:02.129674+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Next 80B A3B Thinking", - "slug": "qwen/qwen3-next-80b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-thinking", + "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "model_variant_slug": "meta-llama/llama-4-maverick", "moderation_required": false, - "name": "Google | qwen/qwen3-next-80b-a3b-thinking-2509", + "name": "Groq | meta-llama/llama-4-maverick-17b-128e-instruct", "pricing": { - "completion": "0.0000012", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, - "provider_display_name": "Google Vertex", + "provider_display_name": "Groq", "provider_info": { - "adapterName": "VertexOpenAIAdapter", - "baseUrl": "not_used", + "adapterName": "GroqAdapter", + "baseUrl": "https://api.groq.com/openai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "requiresUserIDs": true, + "privacyPolicyURL": "https://groq.com/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, - "displayName": "Google Vertex", + "displayName": "Groq", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleVertex.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" }, "ignoredProviderModels": [ - "gemini-2.5-pro-exp-03-25", - "gemini-2.0-flash-exp", - "gemini-1.5-flash-002", - "gemini-2.0-flash-lite-001", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-2.0-flash-001", - "llama-4-scout-17b-16e-instruct-maas", - "qwen3-235b-a22b-instruct-2507-maas", - "gemini-2.5-flash", - "gemini-2.5-flash-image-preview", - "llama-4-maverick-17b-128e-instruct-maas", - "llama-3.3-70b-instruct-maas", - "claude-3-5-haiku@20241022", - "qwen3-coder-480b-a35b-instruct-maas", - "gemini-1.5-pro-002", - "gemini-2.5-pro-preview-06-05", - "gemini-2.5-pro-preview-05-06", - "gemini-2.5-pro", - "deepseek-r1-0528-maas", - "claude-3-7-sonnet@20250219", - "claude-3-5-sonnet-v2@20241022", - "claude-sonnet-4@20250514", - "claude-opus-4-1@20250805", - "claude-opus-4@20250514", - "claude-3-haiku@20240307", - "claude-3-5-sonnet@20240620", - "claude-3-opus@20240229", - "gemini-2.5-flash-lite-preview-09-2025", - "gemini-2.5-flash-preview-09-2025", - "gemini-2.5-flash-image", - "claude-sonnet-4-5@20250929", - "claude-haiku-4-5@20251001", - "minimax/minimax-m2-maas", - "gemini-3-pro-preview", - "gemini-3-pro-image-preview", - "claude-opus-4-5@20251101" - ], - "isAbortable": false, - "isMultipartSupported": true, - "moderationRequired": false, - "name": "Google", - "owners": ["{}"], - "slug": "google-vertex", - "statusPageUrl": "https://status.cloud.google.com/products/sdXM79fz1FS6ekNpu37K/history" + "groq/compound-mini", + "groq/compound", + "playai-tts-arabic", + "playai-tts", + "whisper-large-v3-turbo", + "meta-llama/llama-prompt-guard-2-22m", + "whisper-large-v3", + "allam-2-7b", + "meta-llama/llama-prompt-guard-2-86m", + "moonshotai/kimi-k2-instruct", + "canopylabs/orpheus-v1-english", + "canopylabs/orpheus-arabic-saudi" + ], + "isAbortable": false, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Groq", + "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], + "slug": "groq", + "statusPageUrl": "https://status.groq.com/" }, - "provider_model_id": "qwen/qwen3-next-80b-a3b-thinking-maas", - "provider_name": "Google", - "provider_region": "global", - "provider_slug": "google-vertex", + "provider_model_id": "meta-llama/llama-4-maverick-17b-128e-instruct", + "provider_name": "Groq", + "provider_region": null, + "provider_slug": "groq", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "seed", "stop", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", + "seed", + "response_format", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Next 80B A3B Thinking", - "slug": "qwen/qwen3-next-80b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Google", - "slug": "google-vertex" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": false - }, - "displayName": "Google AI Studio", - "headquarters": "US", - "icon": { - "url": "/images/icons/GoogleAIStudio.svg" - }, - "models": [ + }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-02-05T15:30:13.144552+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 131072, + "created_at": "2025-04-05T19:31:59.735804+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", + "adapter_name": "GroqAdapter", "can_abort": false, - "context_length": 1048576, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, "features": { - "supports_base64_video_input": true, - "supports_input_audio": true, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -76478,7 +77622,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "8b6c3ec6-e6a0-43f7-9e09-a5487a5756c9", + "id": "c65a0343-4d7c-4c34-b665-2bddd8cb8431", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -76491,147 +77635,95 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1000000, - "created_at": "2025-02-05T15:30:13.144552+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 10000000, + "created_at": "2025-04-05T19:31:59.735804+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It introduces notable enhancements in multimodal understanding, coding capabilities, complex instruction following, and function calling. These advancements come together to deliver more seamless and robust agentic experiences.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["text", "image"], "instruct_type": null, - "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", - "name": "Google: Gemini 2.0 Flash", + "model_version_group_id": null, + "name": "Meta: Llama 4 Scout", "output_modalities": ["text"], - "permaslug": "google/gemini-2.0-flash-001", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "reasoning_config": null, "router": null, - "short_name": "Gemini 2.0 Flash", - "slug": "google/gemini-2.0-flash-001", - "updated_at": "2025-11-14T23:34:05.685679+00:00", + "short_name": "Llama 4 Scout", + "slug": "meta-llama/llama-4-scout", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.0-flash-001", - "model_variant_slug": "google/gemini-2.0-flash-001", + "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "model_variant_slug": "meta-llama/llama-4-scout", "moderation_required": false, - "name": "Google AI Studio | google/gemini-2.0-flash-001", + "name": "Groq | meta-llama/llama-4-scout-17b-16e-instruct", "pricing": { - "completion": "0.0000004", + "completion": "0.00000034", "discount": 0, - "image": "0.0000258", - "image_output": "0", - "input_cache_read": "0.000000025", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000011" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Groq", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "GroqAdapter", + "baseUrl": "https://api.groq.com/openai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Groq", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" }, "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" + "groq/compound-mini", + "groq/compound", + "playai-tts-arabic", + "playai-tts", + "whisper-large-v3-turbo", + "meta-llama/llama-prompt-guard-2-22m", + "whisper-large-v3", + "allam-2-7b", + "meta-llama/llama-prompt-guard-2-86m", + "moonshotai/kimi-k2-instruct", + "canopylabs/orpheus-v1-english", + "canopylabs/orpheus-arabic-saudi" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", - "owners": ["{}"], - "slug": "google-ai-studio", - "statusPageUrl": null + "name": "Groq", + "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], + "slug": "groq", + "statusPageUrl": "https://status.groq.com/" }, - "provider_model_id": "gemini-2.0-flash-001", - "provider_name": "Google AI Studio", + "provider_model_id": "meta-llama/llama-4-scout-17b-16e-instruct", + "provider_name": "Groq", "provider_region": null, - "provider_slug": "google-ai-studio", + "provider_slug": "groq", "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", "top_p", + "stop", "seed", "response_format", - "stop", - "structured_outputs", "tools", "tool_choice" ], @@ -76641,62 +77733,46 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["text", "image"], "instruct_type": null, - "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", - "name": "Google: Gemini 2.0 Flash", + "model_version_group_id": null, + "name": "Meta: Llama 4 Scout", "output_modalities": ["text"], - "permaslug": "google/gemini-2.0-flash-001", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "reasoning_config": null, "router": null, - "short_name": "Gemini 2.0 Flash", - "slug": "google/gemini-2.0-flash-001", - "updated_at": "2025-11-14T23:34:05.685679+00:00", + "short_name": "Llama 4 Scout", + "slug": "meta-llama/llama-4-scout", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-02-25T17:56:52.206054+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 131072, + "created_at": "2025-04-30T01:06:33.531556+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.", + "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", + "adapter_name": "GroqAdapter", "can_abort": false, - "context_length": 1048576, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, "features": { - "supports_base64_video_input": true, - "supports_input_audio": true, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -76706,437 +77782,324 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "cae2e26e-549c-494f-b613-99783f016f8b", + "id": "d058b005-6392-46b7-a1c6-df20e57550ba", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, - "max_prompt_tokens": 1048576, + "max_completion_tokens": 1024, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1048576, - "created_at": "2025-02-25T17:56:52.206054+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 163840, + "created_at": "2025-04-30T01:06:33.531556+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5), all at extremely economical token prices.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", + "features": {}, + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-Guard-4-12B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["image", "text"], "instruct_type": null, - "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", - "name": "Google: Gemini 2.0 Flash Lite", + "model_version_group_id": null, + "name": "Meta: Llama Guard 4 12B", "output_modalities": ["text"], - "permaslug": "google/gemini-2.0-flash-lite-001", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-guard-4-12b", + "reasoning_config": null, "router": null, - "short_name": "Gemini 2.0 Flash Lite", - "slug": "google/gemini-2.0-flash-lite-001", - "updated_at": "2025-11-14T23:32:47.563595+00:00", + "short_name": "Llama Guard 4 12B", + "slug": "meta-llama/llama-guard-4-12b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.0-flash-lite-001", - "model_variant_slug": "google/gemini-2.0-flash-lite-001", + "model_variant_permaslug": "meta-llama/llama-guard-4-12b", + "model_variant_slug": "meta-llama/llama-guard-4-12b", "moderation_required": false, - "name": "Google AI Studio | google/gemini-2.0-flash-lite-001", + "name": "Groq | meta-llama/llama-guard-4-12b", "pricing": { - "completion": "0.0000003", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000075", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Groq", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "GroqAdapter", + "baseUrl": "https://api.groq.com/openai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Groq", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" }, "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" + "groq/compound-mini", + "groq/compound", + "playai-tts-arabic", + "playai-tts", + "whisper-large-v3-turbo", + "meta-llama/llama-prompt-guard-2-22m", + "whisper-large-v3", + "allam-2-7b", + "meta-llama/llama-prompt-guard-2-86m", + "moonshotai/kimi-k2-instruct", + "canopylabs/orpheus-v1-english", + "canopylabs/orpheus-arabic-saudi" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", - "owners": ["{}"], - "slug": "google-ai-studio", - "statusPageUrl": null + "name": "Groq", + "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], + "slug": "groq", + "statusPageUrl": "https://status.groq.com/" }, - "provider_model_id": "gemini-2.0-flash-lite-001", - "provider_name": "Google AI Studio", + "provider_model_id": "meta-llama/llama-guard-4-12b", + "provider_name": "Groq", "provider_region": null, - "provider_slug": "google-ai-studio", + "provider_slug": "groq", "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", "top_p", - "seed", - "response_format", "stop", - "structured_outputs", - "tools", - "tool_choice" + "seed", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "features": {}, + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-Guard-4-12B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["image", "text"], "instruct_type": null, - "model_version_group_id": "e993dfbf-2cbd-4680-b866-c05bbdcc8f4d", - "name": "Google: Gemini 2.0 Flash Lite", + "model_version_group_id": null, + "name": "Meta: Llama Guard 4 12B", "output_modalities": ["text"], - "permaslug": "google/gemini-2.0-flash-lite-001", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-guard-4-12b", + "reasoning_config": null, "router": null, - "short_name": "Gemini 2.0 Flash Lite", - "slug": "google/gemini-2.0-flash-lite-001", - "updated_at": "2025-11-14T23:32:47.563595+00:00", + "short_name": "Llama Guard 4 12B", + "slug": "meta-llama/llama-guard-4-12b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-06-17T15:01:28.103313+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", + "adapter_name": "GroqAdapter", "can_abort": false, - "context_length": 1048576, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, "features": { "supported_parameters": { "response_format": true, - "structured_outputs": true + "structured_outputs": false }, - "supports_base64_video_input": true, - "supports_input_audio": true, + "supports_implicit_caching": true, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - }, - "supports_video_urls": true + } }, "has_chat_completions": true, "has_completions": false, - "id": "16c01e26-9b91-4d22-b487-ed35634b017d", + "id": "4b9124a0-ba41-46f5-8927-6e6a43a549af", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 150, "limit_rpm_cf": null, - "max_completion_tokens": 65535, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1048576, - "created_at": "2025-06-17T15:01:28.103313+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash", + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash", - "slug": "google/gemini-2.5-flash", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash", - "model_variant_slug": "google/gemini-2.5-flash", + "model_variant_permaslug": "moonshotai/kimi-k2-0905", + "model_variant_slug": "moonshotai/kimi-k2-0905", "moderation_required": false, - "name": "Google AI Studio | google/gemini-2.5-flash", + "name": "Groq | moonshotai/kimi-k2-0905", "pricing": { - "completion": "0.0000025", + "completion": "0.000003", "discount": 0, - "image": "0.0012384", - "image_output": "0", - "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000005", + "prompt": "0.000001" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Groq", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "GroqAdapter", + "baseUrl": "https://api.groq.com/openai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Groq", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" }, "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" + "groq/compound-mini", + "groq/compound", + "playai-tts-arabic", + "playai-tts", + "whisper-large-v3-turbo", + "meta-llama/llama-prompt-guard-2-22m", + "whisper-large-v3", + "allam-2-7b", + "meta-llama/llama-prompt-guard-2-86m", + "moonshotai/kimi-k2-instruct", + "canopylabs/orpheus-v1-english", + "canopylabs/orpheus-arabic-saudi" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", - "owners": ["{}"], - "slug": "google-ai-studio", - "statusPageUrl": null + "name": "Groq", + "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], + "slug": "groq", + "statusPageUrl": "https://status.groq.com/" }, - "provider_model_id": "gemini-2.5-flash", - "provider_name": "Google AI Studio", + "provider_model_id": "moonshotai/kimi-k2-instruct-0905", + "provider_name": "Groq", "provider_region": null, - "provider_slug": "google-ai-studio", + "provider_slug": "groq", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", "response_format", "max_tokens", "temperature", "top_p", - "seed", "stop", + "seed", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash", + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash", - "slug": "google/gemini-2.5-flash", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 32768, - "created_at": "2025-10-07T20:53:51+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -77144,19 +78107,20 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", + "adapter_name": "GroqAdapter", "can_abort": false, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, "features": { + "is_mandatory_reasoning": true, "supported_parameters": {}, "supports_input_audio": false, "supports_tool_choice": { @@ -77168,22 +78132,22 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "2341e1bc-99ea-46ab-ae2d-9a83846afb43", + "id": "c25b1e3a-a24e-4259-ab3e-06d1e50fcf39", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 1000, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 32768, - "created_at": "2025-10-07T20:53:51+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -77191,174 +78155,147 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-2.5-flash-image", + "name": "OpenAI: gpt-oss-120b", + "output_modalities": ["text"], + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Image (Nano Banana)", - "slug": "google/gemini-2.5-flash-image", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash-image", - "model_variant_slug": "google/gemini-2.5-flash-image", + "model_variant_permaslug": "openai/gpt-oss-120b:exacto", + "model_variant_slug": "openai/gpt-oss-120b:exacto", "moderation_required": false, - "name": "Google AI Studio | google/gemini-2.5-flash-image", + "name": "Groq | openai/gpt-oss-120b:exacto", "pricing": { - "completion": "0.0000025", + "completion": "0.0000006", "discount": 0, - "image": "0.001238", - "image_output": "0.00003", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000075", + "prompt": "0.00000015" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Groq", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "GroqAdapter", + "baseUrl": "https://api.groq.com/openai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Groq", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" }, "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" + "groq/compound-mini", + "groq/compound", + "playai-tts-arabic", + "playai-tts", + "whisper-large-v3-turbo", + "meta-llama/llama-prompt-guard-2-22m", + "whisper-large-v3", + "allam-2-7b", + "meta-llama/llama-prompt-guard-2-86m", + "moonshotai/kimi-k2-instruct", + "canopylabs/orpheus-v1-english", + "canopylabs/orpheus-arabic-saudi" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", - "owners": ["{}"], - "slug": "google-ai-studio", - "statusPageUrl": null + "name": "Groq", + "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], + "slug": "groq", + "statusPageUrl": "https://status.groq.com/" }, - "provider_model_id": "gemini-2.5-flash-image", - "provider_name": "Google AI Studio", + "provider_model_id": "openai/gpt-oss-120b", + "provider_name": "Groq", "provider_region": null, - "provider_slug": "google-ai-studio", + "provider_slug": "groq", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", + "stop", "seed", "response_format", + "tools", + "tool_choice", "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "exacto" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-2.5-flash-image", + "name": "OpenAI: gpt-oss-120b (exacto)", + "output_modalities": ["text"], + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Image (Nano Banana)", - "slug": "google/gemini-2.5-flash-image", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-120b (exacto)", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "google", - "context_length": 32768, - "created_at": "2025-08-26T14:36:17+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -77366,19 +78303,20 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Image Preview, a.k.a. \"Nano Banana,\" is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", + "adapter_name": "GroqAdapter", "can_abort": false, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, "features": { + "is_mandatory_reasoning": true, "supported_parameters": {}, "supports_input_audio": false, "supports_tool_choice": { @@ -77390,22 +78328,22 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "3910bca9-4cd9-41a4-878c-13b91c06afb8", + "id": "60654a45-42cc-48ee-8b1d-47bd44d0ecb0", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 1000, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 32768, - "created_at": "2025-08-26T14:36:17+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -77413,174 +78351,151 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Image Preview, a.k.a. \"Nano Banana,\" is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations.", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Image Preview (Nano Banana)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-2.5-flash-image-preview", + "name": "OpenAI: gpt-oss-20b", + "output_modalities": ["text"], + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Image Preview (Nano Banana)", - "slug": "google/gemini-2.5-flash-image-preview", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash-image-preview", - "model_variant_slug": "google/gemini-2.5-flash-image-preview", + "model_variant_permaslug": "openai/gpt-oss-20b", + "model_variant_slug": "openai/gpt-oss-20b", "moderation_required": false, - "name": "Google AI Studio | google/gemini-2.5-flash-image-preview", + "name": "Groq | openai/gpt-oss-20b", "pricing": { - "completion": "0.0000025", + "completion": "0.0000003", "discount": 0, - "image": "0.001238", - "image_output": "0.00003", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000000375", + "prompt": "0.000000075" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Groq", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "GroqAdapter", + "baseUrl": "https://api.groq.com/openai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Groq", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" }, "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" - ], + "groq/compound-mini", + "groq/compound", + "playai-tts-arabic", + "playai-tts", + "whisper-large-v3-turbo", + "meta-llama/llama-prompt-guard-2-22m", + "whisper-large-v3", + "allam-2-7b", + "meta-llama/llama-prompt-guard-2-86m", + "moonshotai/kimi-k2-instruct", + "canopylabs/orpheus-v1-english", + "canopylabs/orpheus-arabic-saudi" + ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", - "owners": ["{}"], - "slug": "google-ai-studio", - "statusPageUrl": null + "name": "Groq", + "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], + "slug": "groq", + "statusPageUrl": "https://status.groq.com/" }, - "provider_model_id": "gemini-2.5-flash-image-preview", - "provider_name": "Google AI Studio", + "provider_model_id": "openai/gpt-oss-20b", + "provider_name": "Groq", "provider_region": null, - "provider_slug": "google-ai-studio", + "provider_slug": "groq", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", + "stop", "seed", "response_format", + "tools", + "tool_choice", "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Image Preview (Nano Banana)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-2.5-flash-image-preview", + "name": "OpenAI: gpt-oss-20b", + "output_modalities": ["text"], + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Image Preview (Nano Banana)", - "slug": "google/gemini-2.5-flash-image-preview", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-07-22T16:04:36.283638+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-10-29T15:47:16.557286+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -77588,51 +78503,46 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", + "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", + "adapter_name": "GroqAdapter", "can_abort": false, - "context_length": 1048576, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_base64_video_input": true, - "supports_input_audio": true, + "is_mandatory_reasoning": true, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - }, - "supports_video_urls": true + } }, "has_chat_completions": true, "has_completions": false, - "id": "ce839073-aa24-4f29-8358-15b319bd05ec", + "id": "d83e34dd-4624-428d-8c3e-524be778e27f", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65535, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1048576, - "created_at": "2025-07-22T16:04:36.283638+00:00", + "author": "openai", + "context_length": 0, + "created_at": "2025-10-29T15:47:16.557286+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -77640,139 +78550,103 @@ }, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", + "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-safeguard-20b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Lite", + "name": "OpenAI: gpt-oss-safeguard-20b", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-lite", + "permaslug": "openai/gpt-oss-safeguard-20b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Lite", - "slug": "google/gemini-2.5-flash-lite", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-safeguard-20b", + "slug": "openai/gpt-oss-safeguard-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash-lite", - "model_variant_slug": "google/gemini-2.5-flash-lite", + "model_variant_permaslug": "openai/gpt-oss-safeguard-20b", + "model_variant_slug": "openai/gpt-oss-safeguard-20b", "moderation_required": false, - "name": "Google AI Studio | google/gemini-2.5-flash-lite", + "name": "Groq | openai/gpt-oss-safeguard-20b", "pricing": { - "completion": "0.0000004", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000025", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000037", + "prompt": "0.000000075" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Groq", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "GroqAdapter", + "baseUrl": "https://api.groq.com/openai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Groq", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" }, "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" + "groq/compound-mini", + "groq/compound", + "playai-tts-arabic", + "playai-tts", + "whisper-large-v3-turbo", + "meta-llama/llama-prompt-guard-2-22m", + "whisper-large-v3", + "allam-2-7b", + "meta-llama/llama-prompt-guard-2-86m", + "moonshotai/kimi-k2-instruct", + "canopylabs/orpheus-v1-english", + "canopylabs/orpheus-arabic-saudi" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", - "owners": ["{}"], - "slug": "google-ai-studio", - "statusPageUrl": null + "name": "Groq", + "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], + "slug": "groq", + "statusPageUrl": "https://status.groq.com/" }, - "provider_model_id": "gemini-2.5-flash-lite", - "provider_name": "Google AI Studio", + "provider_model_id": "openai/gpt-oss-safeguard-20b", + "provider_name": "Groq", "provider_region": null, - "provider_slug": "google-ai-studio", + "provider_slug": "groq", "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "seed", "stop", + "seed", + "response_format", "tools", "tool_choice" ], @@ -77785,75 +78659,63 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-safeguard-20b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Lite", + "name": "OpenAI: gpt-oss-safeguard-20b", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-lite", + "permaslug": "openai/gpt-oss-safeguard-20b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Flash Lite", - "slug": "google/gemini-2.5-flash-lite", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-safeguard-20b", + "slug": "openai/gpt-oss-safeguard-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-09-25T17:01:26.198818+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "qwen", + "context_length": 131072, + "created_at": "2025-04-28T21:32:25.189881+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", + "adapter_name": "GroqAdapter", "can_abort": false, - "context_length": 1048576, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_base64_video_input": true, - "supports_file_urls": true, - "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - }, - "supports_video_urls": true + } }, "has_chat_completions": true, "has_completions": false, - "id": "fe48694e-e3fb-43a0-98fd-ca364053a9de", + "id": "e74c0abb-6cef-4454-9bf1-d72b44194b6f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -77862,234 +78724,191 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 40960, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1048576, - "created_at": "2025-09-25T17:01:26.198818+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "qwen", + "context_length": 131072, + "created_at": "2025-04-28T21:32:25.189881+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance across common benchmarks compared to earlier Flash models. By default, \"thinking\" (i.e. multi-pass reasoning) is disabled to prioritize speed, but developers can enable it via the [Reasoning API parameter](https://openrouter.ai/docs/use-cases/reasoning-tokens) to selectively trade off cost for intelligence. ", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-32B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", + "name": "Qwen: Qwen3 32B", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", + "permaslug": "qwen/qwen3-32b-04-28", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Gemini 2.5 Flash Lite Preview 09-2025", - "slug": "google/gemini-2.5-flash-lite-preview-09-2025", + "short_name": "Qwen3 32B", + "slug": "qwen/qwen3-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", - "model_variant_slug": "google/gemini-2.5-flash-lite-preview-09-2025", + "model_variant_permaslug": "qwen/qwen3-32b-04-28", + "model_variant_slug": "qwen/qwen3-32b", "moderation_required": false, - "name": "Google AI Studio | google/gemini-2.5-flash-lite-preview-09-2025", + "name": "Groq | qwen/qwen3-32b-04-28", "pricing": { - "completion": "0.0000004", + "completion": "0.00000059", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000145", + "prompt": "0.00000029" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Groq", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "GroqAdapter", + "baseUrl": "https://api.groq.com/openai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://groq.com/privacy-policy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://groq.com/terms-of-use/", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Groq", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" }, "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" + "groq/compound-mini", + "groq/compound", + "playai-tts-arabic", + "playai-tts", + "whisper-large-v3-turbo", + "meta-llama/llama-prompt-guard-2-22m", + "whisper-large-v3", + "allam-2-7b", + "meta-llama/llama-prompt-guard-2-86m", + "moonshotai/kimi-k2-instruct", + "canopylabs/orpheus-v1-english", + "canopylabs/orpheus-arabic-saudi" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", - "owners": ["{}"], - "slug": "google-ai-studio", - "statusPageUrl": null + "name": "Groq", + "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], + "slug": "groq", + "statusPageUrl": "https://status.groq.com/" }, - "provider_model_id": "gemini-2.5-flash-lite-preview-09-2025", - "provider_name": "Google AI Studio", + "provider_model_id": "qwen/qwen3-32b", + "provider_name": "Groq", "provider_region": null, - "provider_slug": "google-ai-studio", + "provider_slug": "groq", "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "seed", "stop", - "tools", - "tool_choice" + "seed", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-32B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Lite Preview 09-2025", + "name": "Qwen: Qwen3 32B", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-lite-preview-09-2025", + "permaslug": "qwen/qwen3-32b-04-28", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Gemini 2.5 Flash Lite Preview 09-2025", - "slug": "google/gemini-2.5-flash-lite-preview-09-2025", + "short_name": "Qwen3 32B", + "slug": "qwen/qwen3-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - }, + } + ], + "name": "Groq", + "slug": "groq" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Hyperbolic", + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + }, + "models": [ { - "author": "google", - "context_length": 1048576, - "created_at": "2025-09-25T17:09:38.646963+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Preview September 2025 Checkpoint is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", - "can_abort": false, - "context_length": 1048576, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_base64_video_input": true, - "supports_file_urls": true, - "supports_implicit_caching": true, - "supports_input_audio": true, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - }, - "supports_video_urls": true + } }, "has_chat_completions": true, - "has_completions": false, - "id": "ab314496-abd4-4392-8e64-f021533bf6bf", + "has_completions": true, + "id": "a79e5b0c-4067-4388-ac6f-b4d794d43201", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -78098,953 +78917,621 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1048576, - "created_at": "2025-09-25T17:09:38.646963+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 2.5 Flash Preview September 2025 Checkpoint is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "features": {}, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": null, + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "file", "text", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Preview 09-2025", + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-preview-09-2025", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, "router": null, - "short_name": "Gemini 2.5 Flash Preview 09-2025", - "slug": "google/gemini-2.5-flash-preview-09-2025", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-flash-preview-09-2025", - "model_variant_slug": "google/gemini-2.5-flash-preview-09-2025", + "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", + "model_variant_slug": "deepseek/deepseek-chat-v3-0324", "moderation_required": false, - "name": "Google AI Studio | google/gemini-2.5-flash-preview-09-2025", + "name": "Hyperbolic | deepseek/deepseek-chat-v3-0324", "pricing": { - "completion": "0.0000025", + "completion": "0.00000125", "discount": 0, - "image": "0.001238", - "image_output": "0", - "input_cache_read": "0.000000075", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000125" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", + "name": "Hyperbolic", "owners": ["{}"], - "slug": "google-ai-studio", + "slug": "hyperbolic", "statusPageUrl": null }, - "provider_model_id": "gemini-2.5-flash-preview-09-2025", - "provider_name": "Google AI Studio", + "provider_model_id": "deepseek-ai/DeepSeek-V3-0324", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "google-ai-studio", - "quantization": "unknown", + "provider_slug": "hyperbolic/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "seed", "stop", - "tools", - "tool_choice" + "frequency_penalty", + "presence_penalty", + "seed", + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "features": {}, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": null, + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "file", "text", "audio", "video"], + "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini 2.5 Flash Preview 09-2025", + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-flash-preview-09-2025", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, "router": null, - "short_name": "Gemini 2.5 Flash Preview 09-2025", - "slug": "google/gemini-2.5-flash-preview-09-2025", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-06-17T14:12:24+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", - "can_abort": false, - "context_length": 1048576, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_base64_video_input": true, - "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - }, - "supports_video_urls": true + } }, "has_chat_completions": true, - "has_completions": false, - "id": "b261d15b-95ce-4c8c-8bce-00eb0dc1ff77", + "has_completions": true, + "id": "5877b9a4-458b-44b5-85b4-a772f0363720", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1048576, - "created_at": "2025-06-17T14:12:24+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy and nuanced context handling. Gemini 2.5 Pro achieves top-tier performance on multiple benchmarks, including first-place positioning on the LMArena leaderboard, reflecting superior human-preference alignment and complex problem-solving abilities.", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": null, + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "Google: Gemini 2.5 Pro", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-pro", + "permaslug": "deepseek/deepseek-r1-0528", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Pro", - "slug": "google/gemini-2.5-pro", - "updated_at": "2026-01-08T23:55:54.79011+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-2.5-pro", - "model_variant_slug": "google/gemini-2.5-pro", + "model_variant_permaslug": "deepseek/deepseek-r1-0528", + "model_variant_slug": "deepseek/deepseek-r1-0528", "moderation_required": false, - "name": "Google AI Studio | google/gemini-2.5-pro", + "name": "Hyperbolic | deepseek/deepseek-r1-0528", "pricing": { - "completion": "0.00001", + "completion": "0.000003", "discount": 0, - "image": "0.00516", - "image_output": "0", - "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "prompt": "0.000003" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", + "name": "Hyperbolic", "owners": ["{}"], - "slug": "google-ai-studio", + "slug": "hyperbolic", "statusPageUrl": null }, - "provider_model_id": "gemini-2.5-pro", - "provider_name": "Google AI Studio", + "provider_model_id": "deepseek-ai/DeepSeek-R1-0528", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "google-ai-studio", + "provider_slug": "hyperbolic", "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "seed", "stop", + "frequency_penalty", + "presence_penalty", + "seed", + "logit_bias", + "top_k", + "min_p", + "repetition_penalty", "tools", "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "completions": "0.000015", - "input_cache_read": "0.00000025", - "input_cache_write": "0.000002875", - "prompt": "0.0000025", - "threshold": 200000, - "type": "prompt-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Gemini", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": null, + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "Google: Gemini 2.5 Pro", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "google/gemini-2.5-pro", + "permaslug": "deepseek/deepseek-r1-0528", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Gemini 2.5 Pro", - "slug": "google/gemini-2.5-pro", - "updated_at": "2026-01-08T23:55:54.79011+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-12-17T15:57:58+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 32768, + "created_at": "2024-08-02T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.\n\nThe model supports a 1M token context window and multimodal inputs including text, images, audio, video, and PDFs, with text output. It includes configurable reasoning via thinking levels (minimal, low, medium, high), structured output, tool use, and automatic context caching. Gemini 3 Flash Preview is optimized for users who want strong reasoning and agentic behavior without the cost or latency of full scale frontier models.", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", - "can_abort": false, - "context_length": 1048576, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "supports_base64_video_input": true, - "supports_file_urls": true, - "supports_implicit_caching": true, - "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - }, - "supports_video_urls": true + } }, "has_chat_completions": true, - "has_completions": false, - "id": "6e82f652-2693-4b60-bbe0-7362d6e345f0", + "has_completions": true, + "id": "e72d3b56-eee0-4422-a1c8-3456ebdb105e", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 400, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1048576, - "created_at": "2025-12-17T15:57:58+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-08-02T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool use performance with substantially lower latency than larger Gemini variants, making it well suited for interactive development, long running agent loops, and collaborative coding tasks. Compared to Gemini 2.5 Flash, it provides broad quality improvements across reasoning, multimodal understanding, and reliability.\n\nThe model supports a 1M token context window and multimodal inputs including text, images, audio, video, and PDFs, with text output. It includes configurable reasoning via thinking levels (minimal, low, medium, high), structured output, tool use, and automatic context caching. Gemini 3 Flash Preview is optimized for users who want strong reasoning and agentic behavior without the cost or latency of full scale frontier models.", - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/llama-3.1-405B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini 3 Flash Preview", + "input_modalities": ["text"], + "instruct_type": "none", + "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", + "name": "Meta: Llama 3.1 405B (base)", "output_modalities": ["text"], - "permaslug": "google/gemini-3-flash-preview-20251217", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.1-405b", + "reasoning_config": null, "router": null, - "short_name": "Gemini 3 Flash Preview", - "slug": "google/gemini-3-flash-preview", - "updated_at": "2025-12-17T16:17:44.159277+00:00", + "short_name": "Llama 3.1 405B (base)", + "slug": "meta-llama/llama-3.1-405b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-3-flash-preview-20251217", - "model_variant_slug": "google/gemini-3-flash-preview", + "model_variant_permaslug": "meta-llama/llama-3.1-405b", + "model_variant_slug": "meta-llama/llama-3.1-405b", "moderation_required": false, - "name": "Google AI Studio | google/gemini-3-flash-preview-20251217", + "name": "Hyperbolic | meta-llama/llama-3.1-405b", "pricing": { - "completion": "0.000003", + "completion": "0.000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000005", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.000004" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", + "name": "Hyperbolic", "owners": ["{}"], - "slug": "google-ai-studio", + "slug": "hyperbolic", "statusPageUrl": null }, - "provider_model_id": "gemini-3-flash-preview", - "provider_name": "Google AI Studio", + "provider_model_id": "meta-llama/Meta-Llama-3.1-405B", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "google-ai-studio", - "quantization": "unknown", + "provider_slug": "hyperbolic/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", - "seed", - "response_format", "stop", - "structured_outputs", - "tools", - "tool_choice" + "frequency_penalty", + "presence_penalty", + "seed", + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/llama-3.1-405B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini 3 Flash Preview", + "input_modalities": ["text"], + "instruct_type": "none", + "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", + "name": "Meta: Llama 3.1 405B (base)", "output_modalities": ["text"], - "permaslug": "google/gemini-3-flash-preview-20251217", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.1-405b", + "reasoning_config": null, "router": null, - "short_name": "Gemini 3 Flash Preview", - "slug": "google/gemini-3-flash-preview", - "updated_at": "2025-12-17T16:17:44.159277+00:00", + "short_name": "Llama 3.1 405B (base)", + "slug": "meta-llama/llama-3.1-405b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 1048576, - "created_at": "2025-11-18T14:04:28+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131000, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.", + "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", - "can_abort": false, - "context_length": 1048576, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 131000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_base64_video_input": true, - "supports_file_urls": true, - "supports_input_audio": true, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - }, - "supports_video_urls": true + } }, "has_chat_completions": true, - "has_completions": false, - "id": "c9badacd-c552-40a2-86e6-4b4e8dd4318f", + "has_completions": true, + "id": "d74f1663-e371-4337-b1c2-59fc3447e189", "is_byok": false, - "is_deranked": true, + "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 400, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 1048576, - "created_at": "2025-11-18T14:04:28+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.", - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Meta-Llama-3.1-405B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini 3 Pro Preview", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", + "name": "Meta: Llama 3.1 405B Instruct", "output_modalities": ["text"], - "permaslug": "google/gemini-3-pro-preview-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.1-405b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Gemini 3 Pro Preview", - "slug": "google/gemini-3-pro-preview", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 3.1 405B Instruct", + "slug": "meta-llama/llama-3.1-405b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-3-pro-preview-20251117", - "model_variant_slug": "google/gemini-3-pro-preview", + "model_variant_permaslug": "meta-llama/llama-3.1-405b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-405b-instruct", "moderation_required": false, - "name": "Google AI Studio | google/gemini-3-pro-preview-20251117", + "name": "Hyperbolic | meta-llama/llama-3.1-405b-instruct", "pricing": { - "completion": "0.000012", + "completion": "0.000004", "discount": 0, - "image": "0.008256", - "image_output": "0", - "input_cache_read": "0.0000002", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0" + "prompt": "0.000004" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", + "name": "Hyperbolic", "owners": ["{}"], - "slug": "google-ai-studio", + "slug": "hyperbolic", "statusPageUrl": null }, - "provider_model_id": "gemini-3-pro-preview", - "provider_name": "Google AI Studio", + "provider_model_id": "meta-llama/Meta-Llama-3.1-405B-Instruct", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "google-ai-studio", - "quantization": "unknown", + "provider_slug": "hyperbolic/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "seed", "stop", - "tools", - "tool_choice" + "frequency_penalty", + "presence_penalty", + "seed", + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, - "variable_pricings": [ - { - "completions": "0.000018", - "input_cache_read": "0.0000004", - "input_cache_write": "0.000004375", - "prompt": "0.000004", - "threshold": 200000, - "type": "prompt-threshold" - } - ], + "supports_reasoning": false, + "supports_tool_parameters": false, + "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Meta-Llama-3.1-405B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file", "audio", "video"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini 3 Pro Preview", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", + "name": "Meta: Llama 3.1 405B Instruct", "output_modalities": ["text"], - "permaslug": "google/gemini-3-pro-preview-20251117", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.1-405b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Gemini 3 Pro Preview", - "slug": "google/gemini-3-pro-preview", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 3.1 405B Instruct", + "slug": "meta-llama/llama-3.1-405b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 20000, - "created_at": "2025-10-31T20:43:30.264019+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "gemini-embedding-001 provides a unified cutting edge experience across domains, including science, legal, finance, and coding. This embedding model has consistently held a top spot on the Massive Text Embedding Benchmark (MTEB) Multilingual leaderboard since the experimental launch in March.", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", - "can_abort": false, - "context_length": 20000, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "supports_input_audio": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -79053,8 +79540,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "b922a8da-940a-4277-8ab0-f3c2a61f89d7", + "has_completions": true, + "id": "c0841471-73f1-4511-b47f-ac2643802026", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -79067,145 +79554,87 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 20000, - "created_at": "2025-10-31T20:43:30.264019+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "gemini-embedding-001 provides a unified cutting edge experience across domains, including science, legal, finance, and coding. This embedding model has consistently held a top spot on the Massive Text Embedding Benchmark (MTEB) Multilingual leaderboard since the experimental launch in March.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", - "has_text_output": false, - "hf_slug": null, + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini Embedding 001", - "output_modalities": ["embeddings"], - "permaslug": "google/gemini-embedding-001", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.1 70B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.1-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Gemini Embedding 001", - "slug": "google/gemini-embedding-001", + "short_name": "Llama 3.1 70B Instruct", + "slug": "meta-llama/llama-3.1-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-embedding-001", - "model_variant_slug": "google/gemini-embedding-001", + "model_variant_permaslug": "meta-llama/llama-3.1-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-70b-instruct", "moderation_required": false, - "name": "Google AI Studio | google/gemini-embedding-001", + "name": "Hyperbolic | meta-llama/llama-3.1-70b-instruct", "pricing": { - "completion": "0", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta/openai", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", + "name": "Hyperbolic", "owners": ["{}"], - "slug": "google-ai-studio", + "slug": "hyperbolic", "statusPageUrl": null }, - "provider_model_id": "gemini-embedding-001", - "provider_name": "Google AI Studio", + "provider_model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "google-ai-studio", - "quantization": "unknown", + "provider_slug": "hyperbolic/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", "top_p", + "stop", + "frequency_penalty", + "presence_penalty", "seed", - "response_format", - "structured_outputs" + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, @@ -79213,62 +79642,46 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", - "has_text_output": false, - "hf_slug": null, + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Gemini Embedding 001", - "output_modalities": ["embeddings"], - "permaslug": "google/gemini-embedding-001", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.1 70B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.1-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Gemini Embedding 001", - "slug": "google/gemini-embedding-001", + "short_name": "Llama 3.1 70B Instruct", + "slug": "meta-llama/llama-3.1-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 65536, - "created_at": "2025-11-20T15:49:57.064095+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\n\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { - "adapter_name": "GoogleAIStudioGeminiAdapter", - "can_abort": false, - "context_length": 65536, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "is_mandatory_reasoning": true, - "supports_input_audio": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -79277,240 +79690,147 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "f5a725e2-8f66-4754-9232-7550685ab3d0", + "has_completions": true, + "id": "f8bced68-f2ab-4d93-808f-59f6ccc1dd7c", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 250, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 65536, - "created_at": "2025-11-20T15:49:57.064095+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and high-fidelity visual synthesis. The model generates context-rich graphics, from infographics and diagrams to cinematic composites, and can incorporate real-time information via Search grounding.\n\nIt offers industry-leading text rendering in images (including long passages and multilingual layouts), consistent multi-image blending, and accurate identity preservation across up to five subjects. Nano Banana Pro adds fine-grained creative controls such as localized edits, lighting and focus adjustments, camera transformations, and support for 2K/4K outputs and flexible aspect ratios. It is designed for professional-grade design, product visualization, storyboarding, and complex multi-element compositions while remaining efficient for general image creation workflows.", - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-3-pro-image-preview-20251120", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.1-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Nano Banana Pro (Gemini 3 Pro Image Preview)", - "slug": "google/gemini-3-pro-image-preview", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemini-3-pro-image-preview-20251120", - "model_variant_slug": "google/gemini-3-pro-image-preview", + "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", "moderation_required": false, - "name": "Google AI Studio | google/gemini-3-pro-image-preview-20251120", + "name": "Hyperbolic | meta-llama/llama-3.1-8b-instruct", "pricing": { - "completion": "0.000012", + "completion": "0.0000001", "discount": 0, - "image": "0.067", - "image_output": "0.00012", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, - "provider_display_name": "Google AI Studio", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GoogleAIStudioGeminiAdapter", - "baseUrl": "https://generativelanguage.googleapis.com/v1beta", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://cloud.google.com/terms/cloud-privacy-notice", - "retainsPrompts": true, - "termsOfServiceURL": "https://cloud.google.com/terms/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Google AI Studio", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "/images/icons/GoogleAIStudio.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "gemini-2.5-pro-1p", - "gemini-2.5-pro-1p-recitation-off", - "gemini-2.5-flash-1p", - "gemini-2.5-flash-1p-recitation-off", - "gemini-2.5-flash-lite-preview-06-11-summarized", - "gemini-2.5-flash", - "gemini-2.5-flash-lite-preview-06-17", - "gemini-2.5-flash-lite", - "gemini-1.5-pro-latest", - "gemini-1.5-pro", - "gemini-1.5-flash-latest", - "gemini-1.5-flash", - "gemini-1.5-flash-8b", - "gemini-1.5-flash-8b-latest", - "gemini-2.5-pro-preview-03-25", - "gemini-2.0-flash", - "gemini-2.0-flash-lite", - "gemini-2.0-flash-lite-preview", - "gemini-2.0-pro-exp", - "gemini-2.0-flash-thinking-exp", - "gemini-2.5-flash-preview-tts", - "gemini-2.5-pro-preview-tts", - "learnlm-2.0-flash-experimental", - "gracefulgolem", - "gemini-2.5-flash-preview-05-20", - "gemini-2.5-pro-preview-06-05", - "gemini-2.0-flash-exp-image-generation", - "gemini-2.0-flash-preview-image-generation", - "gemini-2.0-flash-lite-preview-02-05", - "gemini-2.0-pro-exp-02-05", - "gemini-exp-1206", - "gemini-2.0-flash-thinking-exp-01-21", - "gemini-2.0-flash-thinking-exp-1219", - "gemma-3-1b-it", - "gemini-flash-latest", - "gemini-flash-lite-latest", - "gemini-pro-latest", - "gemini-robotics-er-1.5-preview", - "gemini-embedding-001", - "riftrunner-fst-rewind", - "gemini-3-pro-image-preview", - "nano-banana-pro-preview" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Google AI Studio", + "name": "Hyperbolic", "owners": ["{}"], - "slug": "google-ai-studio", + "slug": "hyperbolic", "statusPageUrl": null }, - "provider_model_id": "gemini-3-pro-image-preview", - "provider_name": "Google AI Studio", - "provider_region": "global", - "provider_slug": "google-ai-studio", - "quantization": "unknown", + "provider_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "provider_name": "Hyperbolic", + "provider_region": null, + "provider_slug": "hyperbolic/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", - "seed", - "response_format", "stop", - "structured_outputs" + "frequency_penalty", + "presence_penalty", + "seed", + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)", - "output_modalities": ["image", "text"], - "permaslug": "google/gemini-3-pro-image-preview-20251120", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.1-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Nano Banana Pro (Gemini 3 Pro Image Preview)", - "slug": "google/gemini-3-pro-image-preview", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Google AI Studio", - "slug": "google-ai-studio" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "Groq", - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" - }, - "models": [ + }, { "author": "meta-llama", "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "created_at": "2024-09-25T00:00:00+00:00", "default_parameters": {}, "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "endpoint": { - "adapter_name": "GroqAdapter", - "can_abort": false, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -79519,8 +79839,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "74af5aaf-6ad5-4389-ae0f-1bd2c7337386", + "has_completions": true, + "id": "9a1f3f30-cc71-43ce-b113-ab824c0e2332", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -79529,129 +79849,114 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "meta-llama", "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "created_at": "2024-09-25T00:00:00+00:00", "default_parameters": {}, "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "model_version_group_id": null, + "name": "Meta: Llama 3.2 3B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", + "permaslug": "meta-llama/llama-3.2-3b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", + "short_name": "Llama 3.2 3B Instruct", + "slug": "meta-llama/llama-3.2-3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_permaslug": "meta-llama/llama-3.2-3b-instruct", + "model_variant_slug": "meta-llama/llama-3.2-3b-instruct", "moderation_required": false, - "name": "Groq | meta-llama/llama-3.1-8b-instruct", + "name": "Hyperbolic | meta-llama/llama-3.2-3b-instruct", "pricing": { - "completion": "0.00000008", + "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, - "provider_display_name": "Groq", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GroqAdapter", - "baseUrl": "https://api.groq.com/openai/v1", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Groq", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "groq/compound-mini", - "groq/compound", - "playai-tts-arabic", - "playai-tts", - "whisper-large-v3-turbo", - "meta-llama/llama-prompt-guard-2-22m", - "whisper-large-v3", - "allam-2-7b", - "meta-llama/llama-prompt-guard-2-86m", - "moonshotai/kimi-k2-instruct", - "canopylabs/orpheus-v1-english", - "canopylabs/orpheus-arabic-saudi" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Groq", - "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], - "slug": "groq", - "statusPageUrl": "https://status.groq.com/" + "name": "Hyperbolic", + "owners": ["{}"], + "slug": "hyperbolic", + "statusPageUrl": null }, - "provider_model_id": "llama-3.1-8b-instant", - "provider_name": "Groq", + "provider_model_id": "meta-llama/Llama-3.2-3B-Instruct", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "groq", - "quantization": "unknown", + "provider_slug": "hyperbolic/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", "top_p", "stop", + "frequency_penalty", + "presence_penalty", "seed", - "response_format", - "tools", - "tool_choice" + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "model_version_group_id": null, + "name": "Meta: Llama 3.2 3B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", + "permaslug": "meta-llama/llama-3.2-3b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", + "short_name": "Llama 3.2 3B Instruct", + "slug": "meta-llama/llama-3.2-3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, @@ -79664,18 +79969,17 @@ "default_system": null, "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { - "adapter_name": "GroqAdapter", - "can_abort": false, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -79684,8 +79988,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "0d4cc43e-1b8b-4694-b2d5-57ea43dc0909", + "has_completions": true, + "id": "3bb0202a-27ee-4f2d-8180-4de5c3157276", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -79694,7 +79998,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -79727,77 +80031,62 @@ "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "Groq | meta-llama/llama-3.3-70b-instruct", + "name": "Hyperbolic | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0.00000079", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000059", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, - "provider_display_name": "Groq", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GroqAdapter", - "baseUrl": "https://api.groq.com/openai/v1", - "byokEnabled": true, + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", + "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Groq", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "groq/compound-mini", - "groq/compound", - "playai-tts-arabic", - "playai-tts", - "whisper-large-v3-turbo", - "meta-llama/llama-prompt-guard-2-22m", - "whisper-large-v3", - "allam-2-7b", - "meta-llama/llama-prompt-guard-2-86m", - "moonshotai/kimi-k2-instruct", - "canopylabs/orpheus-v1-english", - "canopylabs/orpheus-arabic-saudi" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Groq", - "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], - "slug": "groq", - "statusPageUrl": "https://status.groq.com/" + "name": "Hyperbolic", + "owners": ["{}"], + "slug": "hyperbolic", + "statusPageUrl": null }, - "provider_model_id": "llama-3.3-70b-versatile", - "provider_name": "Groq", + "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "groq", - "quantization": "unknown", + "provider_slug": "hyperbolic/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", "top_p", "stop", + "frequency_penalty", + "presence_penalty", "seed", - "response_format", - "tools", - "tool_choice" + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -79821,22 +80110,24 @@ "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 32768, + "created_at": "2024-09-10T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", "endpoint": { - "adapter_name": "GroqAdapter", - "can_abort": false, - "context_length": 131072, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { @@ -79849,8 +80140,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "6a734347-7939-41c5-99af-a6d0ac13b1be", + "has_completions": true, + "id": "b550f7af-571a-45fd-b442-b3327afaf38c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -79859,149 +80150,136 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 4096, + "created_at": "2024-09-10T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", "features": {}, - "group": "Llama4", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": "mistralai/Pixtral-12B-2409", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "name": "Mistral: Pixtral 12B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "permaslug": "mistralai/pixtral-12b", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Pixtral 12B", + "slug": "mistralai/pixtral-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "model_variant_slug": "meta-llama/llama-4-maverick", + "model_variant_permaslug": "mistralai/pixtral-12b", + "model_variant_slug": "mistralai/pixtral-12b", "moderation_required": false, - "name": "Groq | meta-llama/llama-4-maverick-17b-128e-instruct", + "name": "Hyperbolic | mistralai/pixtral-12b", "pricing": { - "completion": "0.0000006", + "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, - "provider_display_name": "Groq", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GroqAdapter", - "baseUrl": "https://api.groq.com/openai/v1", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Groq", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "groq/compound-mini", - "groq/compound", - "playai-tts-arabic", - "playai-tts", - "whisper-large-v3-turbo", - "meta-llama/llama-prompt-guard-2-22m", - "whisper-large-v3", - "allam-2-7b", - "meta-llama/llama-prompt-guard-2-86m", - "moonshotai/kimi-k2-instruct", - "canopylabs/orpheus-v1-english", - "canopylabs/orpheus-arabic-saudi" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Groq", - "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], - "slug": "groq", - "statusPageUrl": "https://status.groq.com/" + "name": "Hyperbolic", + "owners": ["{}"], + "slug": "hyperbolic", + "statusPageUrl": null }, - "provider_model_id": "meta-llama/llama-4-maverick-17b-128e-instruct", - "provider_name": "Groq", + "provider_model_id": "mistralai/Pixtral-12B-2409", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "groq", - "quantization": "unknown", + "provider_slug": "hyperbolic/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", "top_p", "stop", + "frequency_penalty", + "presence_penalty", "seed", - "response_format", - "tools", - "tool_choice" + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama4", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": "mistralai/Pixtral-12B-2409", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "name": "Mistral: Pixtral 12B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "permaslug": "mistralai/pixtral-12b", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Pixtral 12B", + "slug": "mistralai/pixtral-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2025-04-05T19:31:59.735804+00:00", + "author": "qwen", + "context_length": 32768, + "created_at": "2024-08-28T00:00:00+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "endpoint": { - "adapter_name": "GroqAdapter", - "can_abort": false, - "context_length": 131072, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { @@ -80014,8 +80292,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "c65a0343-4d7c-4c34-b665-2bddd8cb8431", + "has_completions": true, + "id": "fae5bc3c-f799-4657-8d05-3cf6f489ed0c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -80024,153 +80302,137 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 10000000, - "created_at": "2025-04-05T19:31:59.735804+00:00", + "author": "qwen", + "context_length": 32768, + "created_at": "2024-08-28T00:00:00+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "features": {}, - "group": "Llama4", + "group": "Qwen", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": "Qwen/Qwen2.5-VL-7B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "name": "Qwen: Qwen2.5-VL 7B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "permaslug": "qwen/qwen-2-vl-7b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Qwen2.5-VL 7B Instruct", + "slug": "qwen/qwen-2.5-vl-7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", - "model_variant_slug": "meta-llama/llama-4-scout", + "model_variant_permaslug": "qwen/qwen-2-vl-7b-instruct", + "model_variant_slug": "qwen/qwen-2.5-vl-7b-instruct", "moderation_required": false, - "name": "Groq | meta-llama/llama-4-scout-17b-16e-instruct", + "name": "Hyperbolic | qwen/qwen-2-vl-7b-instruct", "pricing": { - "completion": "0.00000034", + "completion": "0.0000002", "discount": 0, - "image": "0.00036762", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000011", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, - "provider_display_name": "Groq", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GroqAdapter", - "baseUrl": "https://api.groq.com/openai/v1", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Groq", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "groq/compound-mini", - "groq/compound", - "playai-tts-arabic", - "playai-tts", - "whisper-large-v3-turbo", - "meta-llama/llama-prompt-guard-2-22m", - "whisper-large-v3", - "allam-2-7b", - "meta-llama/llama-prompt-guard-2-86m", - "moonshotai/kimi-k2-instruct", - "canopylabs/orpheus-v1-english", - "canopylabs/orpheus-arabic-saudi" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Groq", - "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], - "slug": "groq", - "statusPageUrl": "https://status.groq.com/" + "name": "Hyperbolic", + "owners": ["{}"], + "slug": "hyperbolic", + "statusPageUrl": null }, - "provider_model_id": "meta-llama/llama-4-scout-17b-16e-instruct", - "provider_name": "Groq", + "provider_model_id": "Qwen/Qwen2.5-VL-7B-Instruct", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "groq", - "quantization": "unknown", + "provider_slug": "hyperbolic/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", "top_p", "stop", + "frequency_penalty", + "presence_penalty", "seed", - "response_format", - "tools", - "tool_choice" + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama4", + "group": "Qwen", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": "Qwen/Qwen2.5-VL-7B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "name": "Qwen: Qwen2.5-VL 7B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "permaslug": "qwen/qwen-2-vl-7b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Qwen2.5-VL 7B Instruct", + "slug": "qwen/qwen-2.5-vl-7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2025-04-30T01:06:33.531556+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { - "adapter_name": "GroqAdapter", - "can_abort": false, - "context_length": 131072, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -80179,8 +80441,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "d058b005-6392-46b7-a1c6-df20e57550ba", + "has_completions": true, + "id": "9f410d50-030e-47bd-aba8-261035cd01e2", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -80189,104 +80451,101 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 1024, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 163840, - "created_at": "2025-04-30T01:06:33.531556+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", - "features": {}, - "group": "Other", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-Guard-4-12B", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama Guard 4 12B", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-4-12b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-07-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama Guard 4 12B", - "slug": "meta-llama/llama-guard-4-12b", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-guard-4-12b", - "model_variant_slug": "meta-llama/llama-guard-4-12b", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "Groq | meta-llama/llama-guard-4-12b", + "name": "Hyperbolic | qwen/qwen3-235b-a22b-07-25", "pricing": { - "completion": "0.0000002", + "completion": "0.00000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, - "provider_display_name": "Groq", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GroqAdapter", - "baseUrl": "https://api.groq.com/openai/v1", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Groq", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "groq/compound-mini", - "groq/compound", - "playai-tts-arabic", - "playai-tts", - "whisper-large-v3-turbo", - "meta-llama/llama-prompt-guard-2-22m", - "whisper-large-v3", - "allam-2-7b", - "meta-llama/llama-prompt-guard-2-86m", - "moonshotai/kimi-k2-instruct", - "canopylabs/orpheus-v1-english", - "canopylabs/orpheus-arabic-saudi" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Groq", - "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], - "slug": "groq", - "statusPageUrl": "https://status.groq.com/" + "name": "Hyperbolic", + "owners": ["{}"], + "slug": "hyperbolic", + "statusPageUrl": null }, - "provider_model_id": "meta-llama/llama-guard-4-12b", - "provider_name": "Groq", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "groq", + "provider_slug": "hyperbolic", "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", "top_p", "stop", + "frequency_penalty", + "presence_penalty", "seed", - "response_format" + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, @@ -80294,51 +80553,56 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Other", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-Guard-4-12B", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama Guard 4 12B", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-4-12b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-07-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama Guard 4 12B", - "slug": "meta-llama/llama-guard-4-12b", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "moonshotai", + "author": "qwen", "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", + "created_at": "2025-07-23T00:29:06+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "endpoint": { - "adapter_name": "GroqAdapter", - "can_abort": false, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": false - }, - "supports_implicit_caching": true, - "supports_input_audio": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -80347,27 +80611,27 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "4b9124a0-ba41-46f5-8927-6e6a43a549af", + "has_completions": true, + "id": "ebee417d-5f49-40f5-b1b3-3fc5f932b80d", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 150, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", + "author": "qwen", + "context_length": 1048576, + "created_at": "2025-07-23T00:29:06+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "features": { "reasoning_config": { "end_token": null, @@ -80375,103 +80639,87 @@ "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-0905", - "model_variant_slug": "moonshotai/kimi-k2-0905", + "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "model_variant_slug": "qwen/qwen3-coder", "moderation_required": false, - "name": "Groq | moonshotai/kimi-k2-0905", + "name": "Hyperbolic | qwen/qwen3-coder-480b-a35b-07-25", "pricing": { - "completion": "0.000003", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000005", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "prompt": "0.000002" }, - "provider_display_name": "Groq", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GroqAdapter", - "baseUrl": "https://api.groq.com/openai/v1", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Groq", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "groq/compound-mini", - "groq/compound", - "playai-tts-arabic", - "playai-tts", - "whisper-large-v3-turbo", - "meta-llama/llama-prompt-guard-2-22m", - "whisper-large-v3", - "allam-2-7b", - "meta-llama/llama-prompt-guard-2-86m", - "moonshotai/kimi-k2-instruct", - "canopylabs/orpheus-v1-english", - "canopylabs/orpheus-arabic-saudi" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Groq", - "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], - "slug": "groq", - "statusPageUrl": "https://status.groq.com/" + "name": "Hyperbolic", + "owners": ["{}"], + "slug": "hyperbolic", + "statusPageUrl": null }, - "provider_model_id": "moonshotai/kimi-k2-instruct-0905", - "provider_name": "Groq", + "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "groq", - "quantization": "unknown", + "provider_slug": "hyperbolic/fp8", + "quantization": "fp8", "supported_parameters": [ - "response_format", "max_tokens", "temperature", "top_p", "stop", + "frequency_penalty", + "presence_penalty", "seed", - "tools", - "tool_choice" + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -80482,32 +80730,32 @@ "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:38:04.192907+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -80515,16 +80763,16 @@ }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", "endpoint": { - "adapter_name": "GroqAdapter", - "can_abort": false, - "context_length": 131072, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { @@ -80534,13 +80782,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, + "literal_required": false, "type_function": true } }, "has_chat_completions": true, - "has_completions": false, - "id": "c25b1e3a-a24e-4259-ab3e-06d1e50fcf39", + "has_completions": true, + "id": "124b570e-1be6-4cd2-9684-34fade4a89fd", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -80549,13 +80797,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:38:04.192907+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -80563,7 +80811,7 @@ }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -80572,89 +80820,71 @@ "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", + "short_name": "Qwen3 Next 80B A3B Thinking", + "slug": "qwen/qwen3-next-80b-a3b-thinking", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b:exacto", - "model_variant_slug": "openai/gpt-oss-120b:exacto", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-thinking", "moderation_required": false, - "name": "Groq | openai/gpt-oss-120b:exacto", + "name": "Hyperbolic | qwen/qwen3-next-80b-a3b-thinking-2509", "pricing": { - "completion": "0.0000006", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, - "provider_display_name": "Groq", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GroqAdapter", - "baseUrl": "https://api.groq.com/openai/v1", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Groq", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "groq/compound-mini", - "groq/compound", - "playai-tts-arabic", - "playai-tts", - "whisper-large-v3-turbo", - "meta-llama/llama-prompt-guard-2-22m", - "whisper-large-v3", - "allam-2-7b", - "meta-llama/llama-prompt-guard-2-86m", - "moonshotai/kimi-k2-instruct", - "canopylabs/orpheus-v1-english", - "canopylabs/orpheus-arabic-saudi" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Groq", - "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], - "slug": "groq", - "statusPageUrl": "https://status.groq.com/" + "name": "Hyperbolic", + "owners": ["{}"], + "slug": "hyperbolic", + "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-120b", - "provider_name": "Groq", + "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "groq", - "quantization": "unknown", + "provider_slug": "hyperbolic/bf16", + "quantization": "bf16", "supported_parameters": [ "reasoning", "include_reasoning", @@ -80662,17 +80892,21 @@ "temperature", "top_p", "stop", + "frequency_penalty", + "presence_penalty", "seed", - "response_format", + "logit_bias", + "top_k", + "min_p", + "repetition_penalty", "tools", - "tool_choice", - "structured_outputs" + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "exacto" + "variant": "standard" }, "features": { "chat_template_config": {}, @@ -80682,55 +80916,49 @@ "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b (exacto)", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b (exacto)", - "slug": "openai/gpt-oss-120b", + "short_name": "Qwen3 Next 80B A3B Thinking", + "slug": "qwen/qwen3-next-80b-a3b-thinking", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "openai", + "author": "qwen", "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "created_at": "2025-03-05T21:06:54.875499+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", "endpoint": { - "adapter_name": "GroqAdapter", - "can_abort": false, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": {}, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -80739,8 +80967,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "60654a45-42cc-48ee-8b1d-47bd44d0ecb0", + "has_completions": true, + "id": "5bbd820f-0208-4fec-9bd4-384f3e5ddaa1", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -80749,115 +80977,87 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", + "author": "qwen", "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "created_at": "2025-03-05T21:06:54.875499+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "GPT", + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/QwQ-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwq", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: QwQ 32B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwq-32b", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "QwQ 32B", + "slug": "qwen/qwq-32b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "qwen/qwq-32b", + "model_variant_slug": "qwen/qwq-32b", "moderation_required": false, - "name": "Groq | openai/gpt-oss-20b", + "name": "Hyperbolic | qwen/qwq-32b", "pricing": { - "completion": "0.0000003", + "completion": "0.00000025", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000000375", - "internal_reasoning": "0", - "prompt": "0.000000075", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, - "provider_display_name": "Groq", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GroqAdapter", - "baseUrl": "https://api.groq.com/openai/v1", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Groq", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "groq/compound-mini", - "groq/compound", - "playai-tts-arabic", - "playai-tts", - "whisper-large-v3-turbo", - "meta-llama/llama-prompt-guard-2-22m", - "whisper-large-v3", - "allam-2-7b", - "meta-llama/llama-prompt-guard-2-86m", - "moonshotai/kimi-k2-instruct", - "canopylabs/orpheus-v1-english", - "canopylabs/orpheus-arabic-saudi" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Groq", - "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], - "slug": "groq", - "statusPageUrl": "https://status.groq.com/" + "name": "Hyperbolic", + "owners": ["{}"], + "slug": "hyperbolic", + "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-20b", - "provider_name": "Groq", + "provider_model_id": "Qwen/QwQ-32B", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "groq", - "quantization": "unknown", + "provider_slug": "hyperbolic/bf16", + "quantization": "bf16", "supported_parameters": [ "reasoning", "include_reasoning", @@ -80865,76 +81065,68 @@ "temperature", "top_p", "stop", + "frequency_penalty", + "presence_penalty", "seed", - "response_format", - "tools", - "tool_choice", - "structured_outputs" + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "GPT", + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/QwQ-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwq", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: QwQ 32B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwq-32b", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "QwQ 32B", + "slug": "qwen/qwq-32b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", + "author": "qwen", "context_length": 131072, - "created_at": "2025-10-29T15:47:16.557286+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "created_at": "2024-09-19T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).", + "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "endpoint": { - "adapter_name": "GroqAdapter", - "can_abort": false, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { - "is_mandatory_reasoning": true, - "supports_input_audio": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -80943,8 +81135,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "d83e34dd-4624-428d-8c3e-524be778e27f", + "has_completions": true, + "id": "0cfb6233-5061-4b1e-be3a-5b897343a176", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -80953,181 +81145,138 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 0, - "created_at": "2025-10-29T15:47:16.557286+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "qwen", + "context_length": 131072, + "created_at": "2024-09-19T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust & safety labeling.\n\nLearn more about this model in OpenAI's gpt-oss-safeguard [user guide](https://cookbook.openai.com/articles/gpt-oss-safeguard-guide).", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "GPT", + "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-safeguard-20b", + "hf_slug": "Qwen/Qwen2.5-72B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-safeguard-20b", + "name": "Qwen2.5 72B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-safeguard-20b", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "qwen/qwen-2.5-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "gpt-oss-safeguard-20b", - "slug": "openai/gpt-oss-safeguard-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen2.5 72B Instruct", + "slug": "qwen/qwen-2.5-72b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-safeguard-20b", - "model_variant_slug": "openai/gpt-oss-safeguard-20b", + "model_variant_permaslug": "qwen/qwen-2.5-72b-instruct", + "model_variant_slug": "qwen/qwen-2.5-72b-instruct", "moderation_required": false, - "name": "Groq | openai/gpt-oss-safeguard-20b", + "name": "Hyperbolic | qwen/qwen-2.5-72b-instruct", "pricing": { - "completion": "0.0000003", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000037", - "internal_reasoning": "0", - "prompt": "0.000000075", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, - "provider_display_name": "Groq", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GroqAdapter", - "baseUrl": "https://api.groq.com/openai/v1", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Groq", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "groq/compound-mini", - "groq/compound", - "playai-tts-arabic", - "playai-tts", - "whisper-large-v3-turbo", - "meta-llama/llama-prompt-guard-2-22m", - "whisper-large-v3", - "allam-2-7b", - "meta-llama/llama-prompt-guard-2-86m", - "moonshotai/kimi-k2-instruct", - "canopylabs/orpheus-v1-english", - "canopylabs/orpheus-arabic-saudi" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Groq", - "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], - "slug": "groq", - "statusPageUrl": "https://status.groq.com/" + "name": "Hyperbolic", + "owners": ["{}"], + "slug": "hyperbolic", + "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-safeguard-20b", - "provider_name": "Groq", + "provider_model_id": "Qwen/Qwen2.5-72B-Instruct", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "groq", - "quantization": "unknown", + "provider_slug": "hyperbolic/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "stop", + "frequency_penalty", + "presence_penalty", "seed", - "response_format", - "tools", - "tool_choice" + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "GPT", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-safeguard-20b", + "hf_slug": "Qwen/Qwen2.5-72B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-safeguard-20b", + "name": "Qwen2.5 72B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-safeguard-20b", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "qwen/qwen-2.5-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "gpt-oss-safeguard-20b", - "slug": "openai/gpt-oss-safeguard-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen2.5 72B Instruct", + "slug": "qwen/qwen-2.5-72b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:32:25.189881+00:00", + "context_length": 32768, + "created_at": "2024-11-11T23:40:00.276653+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\n\n- Significantly improvements in **code generation**, **code reasoning** and **code fixing**. \n- A more comprehensive foundation for real-world applications such as **Code Agents**. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.\n\nTo read more about its evaluation results, check out [Qwen 2.5 Coder's blog](https://qwenlm.github.io/blog/qwen2.5-coder-family/).", "endpoint": { - "adapter_name": "GroqAdapter", - "can_abort": false, - "context_length": 131072, + "adapter_name": "HyperbolicAdapter", + "can_abort": true, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -81136,8 +81285,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "e74c0abb-6cef-4454-9bf1-d72b44194b6f", + "has_completions": true, + "id": "47cf1361-80b9-49fc-a636-14a6a1665346", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -81146,151 +81295,120 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 40960, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:32:25.189881+00:00", + "context_length": 128000, + "created_at": "2024-11-11T23:40:00.276653+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen3", + "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\n\n- Significantly improvements in **code generation**, **code reasoning** and **code fixing**. \n- A more comprehensive foundation for real-world applications such as **Code Agents**. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.\n\nTo read more about its evaluation results, check out [Qwen 2.5 Coder's blog](https://qwenlm.github.io/blog/qwen2.5-coder-family/).", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": "Qwen/Qwen2.5-Coder-32B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "Qwen2.5 Coder 32B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "qwen/qwen-2.5-coder-32b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", + "short_name": "Qwen2.5 Coder 32B Instruct", + "slug": "qwen/qwen-2.5-coder-32b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-32b-04-28", - "model_variant_slug": "qwen/qwen3-32b", + "model_variant_permaslug": "qwen/qwen-2.5-coder-32b-instruct", + "model_variant_slug": "qwen/qwen-2.5-coder-32b-instruct", "moderation_required": false, - "name": "Groq | qwen/qwen3-32b-04-28", + "name": "Hyperbolic | qwen/qwen-2.5-coder-32b-instruct", "pricing": { - "completion": "0.00000059", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000029", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, - "provider_display_name": "Groq", + "provider_display_name": "Hyperbolic", "provider_info": { - "adapterName": "GroqAdapter", - "baseUrl": "https://api.groq.com/openai/v1", + "adapterName": "HyperbolicAdapter", + "baseUrl": "https://api.hyperbolic.xyz/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://groq.com/privacy-policy/", + "privacyPolicyURL": "https://hyperbolic.xyz/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://groq.com/terms-of-use/", + "termsOfServiceURL": "https://hyperbolic.xyz/terms", "training": false }, - "displayName": "Groq", + "displayName": "Hyperbolic", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://groq.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" }, - "ignoredProviderModels": [ - "groq/compound-mini", - "groq/compound", - "playai-tts-arabic", - "playai-tts", - "whisper-large-v3-turbo", - "meta-llama/llama-prompt-guard-2-22m", - "whisper-large-v3", - "allam-2-7b", - "meta-llama/llama-prompt-guard-2-86m", - "moonshotai/kimi-k2-instruct", - "canopylabs/orpheus-v1-english", - "canopylabs/orpheus-arabic-saudi" - ], - "isAbortable": false, + "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Groq", - "owners": ["org_32nX3oZexbk6tkra3WMyrbfMdcg"], - "slug": "groq", - "statusPageUrl": "https://status.groq.com/" + "name": "Hyperbolic", + "owners": ["{}"], + "slug": "hyperbolic", + "statusPageUrl": null }, - "provider_model_id": "qwen/qwen3-32b", - "provider_name": "Groq", + "provider_model_id": "Qwen/Qwen2.5-Coder-32B-Instruct", + "provider_name": "Hyperbolic", "provider_region": null, - "provider_slug": "groq", - "quantization": "unknown", + "provider_slug": "hyperbolic/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "stop", + "frequency_penalty", + "presence_penalty", "seed", - "response_format" + "logit_bias", + "top_k", + "min_p", + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen3", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": "Qwen/Qwen2.5-Coder-32B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "Qwen2.5 Coder 32B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "qwen/qwen-2.5-coder-32b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", + "short_name": "Qwen2.5 Coder 32B Instruct", + "slug": "qwen/qwen-2.5-coder-32b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null } ], - "name": "Groq", - "slug": "groq" + "name": "Hyperbolic", + "slug": "hyperbolic" }, { "dataPolicy": { @@ -81298,29 +81416,33 @@ "retainsPrompts": false, "training": false }, - "displayName": "Hyperbolic", - "headquarters": "US", + "displayName": "Inception", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.inceptionlabs.ai/&size=256" }, "models": [ { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-03-24T13:59:15.252028+00:00", - "default_parameters": {}, + "author": "inception", + "context_length": 128000, + "created_at": "2025-06-26T21:23:46+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "description": "Mercury is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like GPT-4.1 Nano and Claude 3.5 Haiku while matching their performance. Mercury's speed enables developers to provide responsive user experiences, including with voice agents, search interfaces, and chatbots. Read more in the [blog post]\n(https://www.inceptionlabs.ai/blog/introducing-mercury) here. ", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "InceptionAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://www.inceptionlabs.ai/terms#privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://www.inceptionlabs.ai/terms", "training": false }, "features": { @@ -81333,8 +81455,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "a79e5b0c-4067-4388-ac6f-b4d794d43201", + "has_completions": false, + "id": "da1082ea-721e-460e-b330-034ae7aefc7f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -81343,147 +81465,168 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-03-24T13:59:15.252028+00:00", - "default_parameters": {}, + "author": "inception", + "context_length": 128000, + "created_at": "2025-06-26T21:23:46+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", - "features": {}, - "group": "DeepSeek", + "description": "Mercury is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like GPT-4.1 Nano and Claude 3.5 Haiku while matching their performance. Mercury's speed enables developers to provide responsive user experiences, including with voice agents, search interfaces, and chatbots. Read more in the [blog post]\n(https://www.inceptionlabs.ai/blog/introducing-mercury) here. ", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "model_version_group_id": null, + "name": "Inception: Mercury", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", - "reasoning_config": null, + "permaslug": "inception/mercury", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Mercury", + "slug": "inception/mercury", + "updated_at": "2025-11-29T06:32:57.821746+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", - "model_variant_slug": "deepseek/deepseek-chat-v3-0324", + "model_variant_permaslug": "inception/mercury", + "model_variant_slug": "inception/mercury", "moderation_required": false, - "name": "Hyperbolic | deepseek/deepseek-chat-v3-0324", + "name": "Inception | inception/mercury", "pricing": { - "completion": "0.00000125", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Inception", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "InceptionAdapter", + "baseUrl": "https://api.inceptionlabs.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://www.inceptionlabs.ai/terms#privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://www.inceptionlabs.ai/terms", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Inception", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.inceptionlabs.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Inception", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "inception", "statusPageUrl": null }, - "provider_model_id": "deepseek-ai/DeepSeek-V3-0324", - "provider_name": "Hyperbolic", + "provider_model_id": "mercury", + "provider_name": "Inception", "provider_region": null, - "provider_slug": "hyperbolic/fp8", - "quantization": "fp8", + "provider_slug": "inception", + "quantization": "unknown", "supported_parameters": [ "max_tokens", - "temperature", - "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", - "logit_bias", - "top_k", - "min_p", - "repetition_penalty" + "stop", + "temperature", + "top_p", + "tools", + "tool_choice", + "response_format", + "structured_outputs", + "top_k" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "DeepSeek", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "model_version_group_id": null, + "name": "Inception: Mercury", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", - "reasoning_config": null, + "permaslug": "inception/mercury", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Mercury", + "slug": "inception/mercury", + "updated_at": "2025-11-29T06:32:57.821746+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-05-28T17:59:30.833128+00:00", + "author": "inception", + "context_length": 128000, + "created_at": "2025-04-30T17:24:40+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, + "temperature": 0, "top_p": null }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "description": "Mercury Coder is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like Claude 3.5 Haiku and GPT-4o Mini while matching their performance. Mercury Coder's speed means that developers can stay in the flow while coding, enjoying rapid chat-based iteration and responsive code completion suggestions. On Copilot Arena, Mercury Coder ranks 1st in speed and ties for 2nd in quality. Read more in the [blog post here](https://www.inceptionlabs.ai/blog/introducing-mercury).", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "InceptionAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://www.inceptionlabs.ai/terms#privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://www.inceptionlabs.ai/terms", "training": false }, "features": { - "is_mandatory_reasoning": true, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -81492,8 +81635,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "5877b9a4-458b-44b5-85b4-a772f0363720", + "has_completions": false, + "id": "eb51fb37-11b0-42d1-924a-c25fa2375569", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -81502,118 +81645,109 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-05-28T17:59:30.833128+00:00", + "author": "inception", + "context_length": 128000, + "created_at": "2025-04-30T17:24:40+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, + "temperature": 0, "top_p": null }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "description": "Mercury Coder is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like Claude 3.5 Haiku and GPT-4o Mini while matching their performance. Mercury Coder's speed means that developers can stay in the flow while coding, enjoying rapid chat-based iteration and responsive code completion suggestions. On Copilot Arena, Mercury Coder ranks 1st in speed and ties for 2nd in quality. Read more in the [blog post here](https://www.inceptionlabs.ai/blog/introducing-mercury).", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: R1 0528", + "name": "Inception: Mercury Coder", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528", + "permaslug": "inception/mercury-coder-small-beta", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "R1 0528", - "slug": "deepseek/deepseek-r1-0528", - "updated_at": "2026-01-08T20:10:31.314892+00:00", + "short_name": "Mercury Coder", + "slug": "inception/mercury-coder", + "updated_at": "2025-11-29T06:33:04.739524+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-0528", - "model_variant_slug": "deepseek/deepseek-r1-0528", + "model_variant_permaslug": "inception/mercury-coder-small-beta", + "model_variant_slug": "inception/mercury-coder", "moderation_required": false, - "name": "Hyperbolic | deepseek/deepseek-r1-0528", + "name": "Inception | inception/mercury-coder-small-beta", "pricing": { - "completion": "0.000003", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Inception", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "InceptionAdapter", + "baseUrl": "https://api.inceptionlabs.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://www.inceptionlabs.ai/terms#privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://www.inceptionlabs.ai/terms", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Inception", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.inceptionlabs.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Inception", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "inception", "statusPageUrl": null }, - "provider_model_id": "deepseek-ai/DeepSeek-R1-0528", - "provider_name": "Hyperbolic", + "provider_model_id": "mercury-coder", + "provider_name": "Inception", "provider_region": null, - "provider_slug": "hyperbolic", + "provider_slug": "inception", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", - "temperature", - "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", - "logit_bias", - "top_k", - "min_p", - "repetition_penalty", + "stop", + "temperature", + "top_p", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs", + "top_k" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" @@ -81621,54 +81755,69 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: R1 0528", + "name": "Inception: Mercury Coder", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528", + "permaslug": "inception/mercury-coder-small-beta", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "R1 0528", - "slug": "deepseek/deepseek-r1-0528", - "updated_at": "2026-01-08T20:10:31.314892+00:00", + "short_name": "Mercury Coder", + "slug": "inception/mercury-coder", + "updated_at": "2025-11-29T06:33:04.739524+00:00", "warning_message": null - }, + } + ], + "name": "Inception", + "slug": "inception" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "datacenters": [], + "displayName": "Inceptron", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.inceptron.io&size=256" + }, + "models": [ { "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { - "adapter_name": "HyperbolicAdapter", - "can_abort": true, - "context_length": 8192, + "adapter_name": "InceptronAdapter", + "can_abort": false, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://www.inceptron.io/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://www.inceptron.io/termsofservice", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -81677,8 +81826,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "1eeba6ab-b98e-4d38-ad83-2b7b153e6e66", + "has_completions": false, + "id": "3a279bc5-dc8a-4c77-8167-1dbcebc60afd", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -81687,152 +81836,148 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-70B-Instruct", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "llama3", "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3 70B Instruct", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-70b-instruct", + "permaslug": "meta-llama/llama-3.3-70b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3 70B Instruct", - "slug": "meta-llama/llama-3-70b-instruct", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3-70b-instruct", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "Hyperbolic | meta-llama/llama-3-70b-instruct", + "name": "Inceptron | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0.0000004", + "completion": "0.00000038", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000012" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Inceptron", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "InceptronAdapter", + "baseUrl": "https://api.inceptron.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://www.inceptron.io/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://www.inceptron.io/termsofservice", "training": false }, - "displayName": "Hyperbolic", - "editors": ["{}"], + "displayName": "Inceptron", + "editors": [], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.inceptron.io&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], - "isAbortable": true, - "isMultipartSupported": true, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Hyperbolic", - "owners": ["{}"], - "slug": "hyperbolic", + "name": "Inceptron", + "owners": ["org_38eVjOI3LvUE8oMUJreSPcIggIs"], + "slug": "inceptron", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Meta-Llama-3-70B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "nvidia/llama-3.3-70b-instruct-fp8", + "provider_name": "Inceptron", "provider_region": null, - "provider_slug": "hyperbolic", - "quantization": "unknown", + "provider_slug": "inceptron/fp8", + "quantization": "fp8", "supported_parameters": [ - "max_tokens", "temperature", + "max_tokens", "top_p", "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "logit_bias", - "top_k", - "min_p", - "repetition_penalty" + "tools", + "tool_choice", + "structured_outputs", + "response_format" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-70B-Instruct", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "llama3", "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3 70B Instruct", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-70b-instruct", + "permaslug": "meta-llama/llama-3.3-70b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3 70B Instruct", - "slug": "meta-llama/llama-3-70b-instruct", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 32768, - "created_at": "2024-08-02T00:00:00+00:00", - "default_parameters": {}, + "author": "minimax", + "context_length": 196608, + "created_at": "2025-12-23T01:56:37+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.9 + }, "default_stops": [], - "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "endpoint": { - "adapter_name": "HyperbolicAdapter", - "can_abort": true, - "context_length": 32768, + "adapter_name": "InceptronAdapter", + "can_abort": false, + "context_length": 196608, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://www.inceptron.io/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://www.inceptron.io/termsofservice", "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true + "literal_none": false, + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, - "has_completions": true, - "id": "e72d3b56-eee0-4422-a1c8-3456ebdb105e", + "has_completions": false, + "id": "9f64833a-3a95-4a6c-9c72-91edc99e4978", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -81841,153 +81986,180 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 196608, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-08-02T00:00:00+00:00", - "default_parameters": {}, + "author": "minimax", + "context_length": 204800, + "created_at": "2025-12-23T01:56:37+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.9 + }, "default_stops": [], - "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This is the base 405B pre-trained version.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/llama-3.1-405B", + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "none", - "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", - "name": "Meta: Llama 3.1 405B (base)", + "instruct_type": null, + "model_version_group_id": null, + "name": "MiniMax: MiniMax M2.1", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-405b", - "reasoning_config": null, + "permaslug": "minimax/minimax-m2.1", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 405B (base)", - "slug": "meta-llama/llama-3.1-405b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-405b", - "model_variant_slug": "meta-llama/llama-3.1-405b", + "model_variant_permaslug": "minimax/minimax-m2.1", + "model_variant_slug": "minimax/minimax-m2.1", "moderation_required": false, - "name": "Hyperbolic | meta-llama/llama-3.1-405b", + "name": "Inceptron | minimax/minimax-m2.1", "pricing": { - "completion": "0.000004", + "completion": "0.0000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000027" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Inceptron", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "InceptronAdapter", + "baseUrl": "https://api.inceptron.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://www.inceptron.io/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://www.inceptron.io/termsofservice", "training": false }, - "displayName": "Hyperbolic", - "editors": ["{}"], + "displayName": "Inceptron", + "editors": [], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.inceptron.io&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], - "isAbortable": true, - "isMultipartSupported": true, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Hyperbolic", - "owners": ["{}"], - "slug": "hyperbolic", + "name": "Inceptron", + "owners": ["org_38eVjOI3LvUE8oMUJreSPcIggIs"], + "slug": "inceptron", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Meta-Llama-3.1-405B", - "provider_name": "Hyperbolic", + "provider_model_id": "MiniMaxAI/MiniMax-M2.1", + "provider_name": "Inceptron", "provider_region": null, - "provider_slug": "hyperbolic/bf16", - "quantization": "bf16", + "provider_slug": "inceptron/fp8", + "quantization": "fp8", "supported_parameters": [ - "max_tokens", + "reasoning", + "include_reasoning", "temperature", + "max_tokens", "top_p", "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "logit_bias", - "top_k", - "min_p", - "repetition_penalty" + "tools", + "tool_choice", + "structured_outputs", + "response_format" ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_multipart": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/llama-3.1-405B", + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "none", - "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", - "name": "Meta: Llama 3.1 405B (base)", + "instruct_type": null, + "model_version_group_id": null, + "name": "MiniMax: MiniMax M2.1", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-405b", - "reasoning_config": null, + "permaslug": "minimax/minimax-m2.1", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 405B (base)", - "slug": "meta-llama/llama-3.1-405b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131000, - "created_at": "2024-07-23T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "z-ai", + "context_length": 202752, + "created_at": "2025-12-22T04:33:34.884504+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "endpoint": { - "adapter_name": "HyperbolicAdapter", - "can_abort": true, - "context_length": 131000, + "adapter_name": "InceptronAdapter", + "can_abort": false, + "context_length": 202752, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://www.inceptron.io/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://www.inceptron.io/termsofservice", "training": false }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": true, - "id": "d74f1663-e371-4337-b1c2-59fc3447e189", + "has_completions": false, + "id": "fc5618a8-4eb1-4fae-ba5e-e46e491eea6c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -81996,143 +82168,181 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 202752, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-405B-Instruct", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", - "name": "Meta: Llama 3.1 405B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-405b-instruct", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.7-20251222", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 405B Instruct", - "slug": "meta-llama/llama-3.1-405b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-405b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-405b-instruct", + "model_variant_permaslug": "z-ai/glm-4.7-20251222", + "model_variant_slug": "z-ai/glm-4.7", "moderation_required": false, - "name": "Hyperbolic | meta-llama/llama-3.1-405b-instruct", + "name": "Inceptron | z-ai/glm-4.7-20251222", "pricing": { - "completion": "0.000004", + "completion": "0.0000019", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Inceptron", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "InceptronAdapter", + "baseUrl": "https://api.inceptron.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://www.inceptron.io/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://www.inceptron.io/termsofservice", "training": false }, - "displayName": "Hyperbolic", - "editors": ["{}"], + "displayName": "Inceptron", + "editors": [], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.inceptron.io&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], - "isAbortable": true, - "isMultipartSupported": true, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Hyperbolic", - "owners": ["{}"], - "slug": "hyperbolic", + "name": "Inceptron", + "owners": ["org_38eVjOI3LvUE8oMUJreSPcIggIs"], + "slug": "inceptron", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Meta-Llama-3.1-405B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "zai-org/GLM-4.7-FP8", + "provider_name": "Inceptron", "provider_region": null, - "provider_slug": "hyperbolic/bf16", - "quantization": "bf16", + "provider_slug": "inceptron/fp8", + "quantization": "fp8", "supported_parameters": [ - "max_tokens", + "reasoning", + "include_reasoning", "temperature", + "max_tokens", "top_p", "stop", "frequency_penalty", "presence_penalty", "seed", - "logit_bias", - "top_k", - "min_p", - "repetition_penalty" + "reasoning_effort", + "logprobs", + "top_logprobs", + "parallel_tool_calls", + "tool_choice", + "tools" ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_multipart": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", - "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-405B-Instruct", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", - "name": "Meta: Llama 3.1 405B Instruct", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "zai-org/GLM-4.7", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-405b-instruct", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.7-20251222", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 405B Instruct", - "slug": "meta-llama/llama-3.1-405b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null - }, + } + ], + "name": "Inceptron", + "slug": "inceptron" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Infermatic", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://infermatic.ai/&size=256" + }, + "models": [ { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "author": "sao10k", + "context_length": 16000, + "created_at": "2025-01-08T02:20:54.222148+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "InfermaticAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 16000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -82142,7 +82352,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c0841471-73f1-4511-b47f-ac2643802026", + "id": "5282475d-e762-4788-b9ea-de6578d81a57", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -82155,80 +82365,99 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "author": "sao10k", + "context_length": 16000, + "created_at": "2025-01-08T02:20:54.222148+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).", "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "hf_slug": "Sao10K/L3.1-70B-Hanami-x1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.1 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Sao10K: Llama 3.1 70B Hanami x1", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-70b-instruct", + "permaslug": "sao10k/l3.1-70b-hanami-x1", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 70B Instruct", - "slug": "meta-llama/llama-3.1-70b-instruct", + "short_name": "Llama 3.1 70B Hanami x1", + "slug": "sao10k/l3.1-70b-hanami-x1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-70b-instruct", + "model_variant_permaslug": "sao10k/l3.1-70b-hanami-x1", + "model_variant_slug": "sao10k/l3.1-70b-hanami-x1", "moderation_required": false, - "name": "Hyperbolic | meta-llama/llama-3.1-70b-instruct", + "name": "Infermatic | sao10k/l3.1-70b-hanami-x1", "pricing": { - "completion": "0.0000004", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.000003" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Infermatic", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "InfermaticAdapter", + "baseUrl": "https://api.totalgpt.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Infermatic", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://infermatic.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": [ + "Qwen2.5-72B-Instruct-Turbo", + "NousResearch-DeepHermes-3-Mistral-24B-Preview", + "deepseek-ai-DeepSeek-R1-Distill-Llama-70B", + "Qwen2-72B-Instruct", + "Meta-Llama-Guard-2-8B", + "TheDrummer-Anubis-70B-v1-FP8-Dynamic", + "anthracite-org-magnum-v2-72b-FP8-Dynamic", + "TheDrummer-Valkyrie-49B-v1", + "TTS-hexgrad-Kokoro-82M", + "Qwen-Qwen3-235B-A22B-Thinking-2507", + "TheDrummer-Anubis-70B-v1.1-FP8-Dynamic", + "nvidia-Llama-3.1-Nemotron-70B-Instruct-HF", + "Strawberrylemonade-L3-70B-v1.1-FP8-Dynamic", + "Mixtral-8x7B-Instruct-v0.1", + "Sao10K-70B-L3.3-Cirrus-x1", + "TheDrummer-Fallen-Llama-3.3-R1-70B-v1", + "Doctor-Shotgun-L3.3-70B-Magnum-v4-SE", + "Qwen-Qwen3-30B-A3B", + "Midnight-Miqu-70B-v1.5", + "Sao10K-72B-Qwen2.5-Kunou-v1-FP8-Dynamic", + "intfloat-multilingual-e5-base", + "Llama-3.2-11B-Vision-Instruct", + "anthracite-org-magnum-v4-72b-FP8-Dynamic", + "Sao10K-L3.3-70B-Euryale-v2.3-FP8-Dynamic" + ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Infermatic", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "infermatic", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "Sao10K-L3.1-70B-Hanami-x1", + "provider_name": "Infermatic", "provider_region": null, - "provider_slug": "hyperbolic/fp8", - "quantization": "fp8", + "provider_slug": "infermatic/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", @@ -82236,11 +82465,11 @@ "stop", "frequency_penalty", "presence_penalty", - "seed", + "repetition_penalty", "logit_bias", "top_k", "min_p", - "repetition_penalty" + "seed" ], "supports_multipart": true, "supports_reasoning": false, @@ -82251,43 +82480,42 @@ "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "hf_slug": "Sao10K/L3.1-70B-Hanami-x1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.1 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Sao10K: Llama 3.1 70B Hanami x1", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-70b-instruct", + "permaslug": "sao10k/l3.1-70b-hanami-x1", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 70B Instruct", - "slug": "meta-llama/llama-3.1-70b-instruct", + "short_name": "Llama 3.1 70B Hanami x1", + "slug": "sao10k/l3.1-70b-hanami-x1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "author": "raifle", + "context_length": 16000, + "created_at": "2024-11-08T22:31:23.953049+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["USER:", ""], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "InfermaticAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 16000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -82297,7 +82525,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f8bced68-f2ab-4d93-808f-59f6ccc1dd7c", + "id": "65206957-c772-4743-93ff-45900a190ddd", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -82310,80 +82538,99 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "author": "raifle", + "context_length": 16000, + "created_at": "2024-11-08T22:31:23.953049+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["USER:", ""], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling", "features": {}, - "group": "Llama3", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": "rAIfle/SorcererLM-8x22b-bf16", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "instruct_type": "vicuna", + "model_version_group_id": null, + "name": "SorcererLM 8x22B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", + "permaslug": "raifle/sorcererlm-8x22b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", + "short_name": "SorcererLM 8x22B", + "slug": "raifle/sorcererlm-8x22b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_permaslug": "raifle/sorcererlm-8x22b", + "model_variant_slug": "raifle/sorcererlm-8x22b", "moderation_required": false, - "name": "Hyperbolic | meta-llama/llama-3.1-8b-instruct", + "name": "Infermatic | raifle/sorcererlm-8x22b", "pricing": { - "completion": "0.0000001", + "completion": "0.0000045", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000045" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Infermatic", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "InfermaticAdapter", + "baseUrl": "https://api.totalgpt.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Infermatic", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://infermatic.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": [ + "Qwen2.5-72B-Instruct-Turbo", + "NousResearch-DeepHermes-3-Mistral-24B-Preview", + "deepseek-ai-DeepSeek-R1-Distill-Llama-70B", + "Qwen2-72B-Instruct", + "Meta-Llama-Guard-2-8B", + "TheDrummer-Anubis-70B-v1-FP8-Dynamic", + "anthracite-org-magnum-v2-72b-FP8-Dynamic", + "TheDrummer-Valkyrie-49B-v1", + "TTS-hexgrad-Kokoro-82M", + "Qwen-Qwen3-235B-A22B-Thinking-2507", + "TheDrummer-Anubis-70B-v1.1-FP8-Dynamic", + "nvidia-Llama-3.1-Nemotron-70B-Instruct-HF", + "Strawberrylemonade-L3-70B-v1.1-FP8-Dynamic", + "Mixtral-8x7B-Instruct-v0.1", + "Sao10K-70B-L3.3-Cirrus-x1", + "TheDrummer-Fallen-Llama-3.3-R1-70B-v1", + "Doctor-Shotgun-L3.3-70B-Magnum-v4-SE", + "Qwen-Qwen3-30B-A3B", + "Midnight-Miqu-70B-v1.5", + "Sao10K-72B-Qwen2.5-Kunou-v1-FP8-Dynamic", + "intfloat-multilingual-e5-base", + "Llama-3.2-11B-Vision-Instruct", + "anthracite-org-magnum-v4-72b-FP8-Dynamic", + "Sao10K-L3.3-70B-Euryale-v2.3-FP8-Dynamic" + ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Infermatic", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "infermatic", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "rAIfle-SorcererLM-8x22b-bf16", + "provider_name": "Infermatic", "provider_region": null, - "provider_slug": "hyperbolic/fp8", - "quantization": "fp8", + "provider_slug": "infermatic", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -82391,11 +82638,11 @@ "stop", "frequency_penalty", "presence_penalty", - "seed", + "repetition_penalty", "logit_bias", "top_k", "min_p", - "repetition_penalty" + "seed" ], "supports_multipart": true, "supports_reasoning": false, @@ -82404,41 +82651,41 @@ "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": "rAIfle/SorcererLM-8x22b-bf16", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "instruct_type": "vicuna", + "model_version_group_id": null, + "name": "SorcererLM 8x22B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", + "permaslug": "raifle/sorcererlm-8x22b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", + "short_name": "SorcererLM 8x22B", + "slug": "raifle/sorcererlm-8x22b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", + "author": "thedrummer", + "context_length": 32768, + "created_at": "2024-09-30T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "InfermaticAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", "training": false }, "features": { @@ -82451,7 +82698,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9a1f3f30-cc71-43ce-b113-ab824c0e2332", + "id": "caf29916-6adb-45f8-b895-dd863415d3ed", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -82464,80 +82711,99 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", + "author": "thedrummer", + "context_length": 32768, + "created_at": "2024-09-30T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", "features": {}, - "group": "Llama3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", + "hf_slug": "TheDrummer/Rocinante-12B-v1.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Meta: Llama 3.2 3B Instruct", + "name": "TheDrummer: Rocinante 12B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-3b-instruct", + "permaslug": "thedrummer/rocinante-12b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.2 3B Instruct", - "slug": "meta-llama/llama-3.2-3b-instruct", + "short_name": "Rocinante 12B", + "slug": "thedrummer/rocinante-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.2-3b-instruct", - "model_variant_slug": "meta-llama/llama-3.2-3b-instruct", + "model_variant_permaslug": "thedrummer/rocinante-12b", + "model_variant_slug": "thedrummer/rocinante-12b", "moderation_required": false, - "name": "Hyperbolic | meta-llama/llama-3.2-3b-instruct", + "name": "Infermatic | thedrummer/rocinante-12b", "pricing": { - "completion": "0.0000001", + "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Infermatic", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "InfermaticAdapter", + "baseUrl": "https://api.totalgpt.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Infermatic", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://infermatic.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": [ + "Qwen2.5-72B-Instruct-Turbo", + "NousResearch-DeepHermes-3-Mistral-24B-Preview", + "deepseek-ai-DeepSeek-R1-Distill-Llama-70B", + "Qwen2-72B-Instruct", + "Meta-Llama-Guard-2-8B", + "TheDrummer-Anubis-70B-v1-FP8-Dynamic", + "anthracite-org-magnum-v2-72b-FP8-Dynamic", + "TheDrummer-Valkyrie-49B-v1", + "TTS-hexgrad-Kokoro-82M", + "Qwen-Qwen3-235B-A22B-Thinking-2507", + "TheDrummer-Anubis-70B-v1.1-FP8-Dynamic", + "nvidia-Llama-3.1-Nemotron-70B-Instruct-HF", + "Strawberrylemonade-L3-70B-v1.1-FP8-Dynamic", + "Mixtral-8x7B-Instruct-v0.1", + "Sao10K-70B-L3.3-Cirrus-x1", + "TheDrummer-Fallen-Llama-3.3-R1-70B-v1", + "Doctor-Shotgun-L3.3-70B-Magnum-v4-SE", + "Qwen-Qwen3-30B-A3B", + "Midnight-Miqu-70B-v1.5", + "Sao10K-72B-Qwen2.5-Kunou-v1-FP8-Dynamic", + "intfloat-multilingual-e5-base", + "Llama-3.2-11B-Vision-Instruct", + "anthracite-org-magnum-v4-72b-FP8-Dynamic", + "Sao10K-L3.3-70B-Euryale-v2.3-FP8-Dynamic" + ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Infermatic", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "infermatic", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Llama-3.2-3B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "TheDrummer-Rocinante-12B-v1.1", + "provider_name": "Infermatic", "provider_region": null, - "provider_slug": "hyperbolic/fp8", - "quantization": "fp8", + "provider_slug": "infermatic/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", @@ -82545,11 +82811,11 @@ "stop", "frequency_penalty", "presence_penalty", - "seed", + "repetition_penalty", "logit_bias", "top_k", "min_p", - "repetition_penalty" + "seed" ], "supports_multipart": true, "supports_reasoning": false, @@ -82558,41 +82824,56 @@ "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", + "hf_slug": "TheDrummer/Rocinante-12B-v1.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Meta: Llama 3.2 3B Instruct", + "name": "TheDrummer: Rocinante 12B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-3b-instruct", + "permaslug": "thedrummer/rocinante-12b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.2 3B Instruct", - "slug": "meta-llama/llama-3.2-3b-instruct", + "short_name": "Rocinante 12B", + "slug": "thedrummer/rocinante-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - }, + } + ], + "name": "Infermatic", + "slug": "infermatic" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "displayName": "Inflection", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://inflection.ai/&size=256" + }, + "models": [ { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", + "author": "inflection", + "context_length": 8000, + "created_at": "2024-10-11T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay.\n\nPi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.", "endpoint": { - "adapter_name": "HyperbolicAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "InflectionLegacyAdapter", + "can_abort": false, + "context_length": 8000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://inflection.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://developers.inflection.ai/tos", "training": false }, "features": { @@ -82604,8 +82885,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "3bb0202a-27ee-4f2d-8180-4de5c3157276", + "has_completions": false, + "id": "2dbf0c2a-b934-47dd-983d-0ad1d91a4838", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -82614,145 +82895,124 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 1024, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", + "author": "inflection", + "context_length": 8000, + "created_at": "2024-10-11T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay.\n\nPi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.", "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Inflection: Inflection 3 Pi", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", + "permaslug": "inflection/inflection-3-pi", "reasoning_config": null, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", + "short_name": "Inflection 3 Pi", + "slug": "inflection/inflection-3-pi", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_permaslug": "inflection/inflection-3-pi", + "model_variant_slug": "inflection/inflection-3-pi", "moderation_required": false, - "name": "Hyperbolic | meta-llama/llama-3.3-70b-instruct", + "name": "Inflection | inflection/inflection-3-pi", "pricing": { - "completion": "0.0000004", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000025" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Inflection", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "InflectionLegacyAdapter", + "baseUrl": "https://layercake.pubwestus3.inf7ks8.com/external/api/inference", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://inflection.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://developers.inflection.ai/tos", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Inflection", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://inflection.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], - "isAbortable": true, - "isMultipartSupported": true, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Inflection", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "inflection", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "inflection_3_pi", + "provider_name": "Inflection", "provider_region": null, - "provider_slug": "hyperbolic/fp8", - "quantization": "fp8", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "logit_bias", - "top_k", - "min_p", - "repetition_penalty" - ], - "supports_multipart": true, + "provider_slug": "inflection", + "quantization": "unknown", + "supported_parameters": ["max_tokens", "temperature", "top_p", "stop"], + "supports_multipart": false, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Inflection: Inflection 3 Pi", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", + "permaslug": "inflection/inflection-3-pi", "reasoning_config": null, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", + "short_name": "Inflection 3 Pi", + "slug": "inflection/inflection-3-pi", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32768, - "created_at": "2024-09-10T00:00:00+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "author": "inflection", + "context_length": 8000, + "created_at": "2024-10-11T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", + "description": "Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news.\n\nFor emotional intelligence similar to Pi, see [Inflect 3 Pi](/inflection/inflection-3-pi)\n\nSee [Inflection's announcement](https://inflection.ai/blog/enterprise) for more details.", "endpoint": { - "adapter_name": "HyperbolicAdapter", - "can_abort": true, - "context_length": 32768, + "adapter_name": "InflectionLegacyAdapter", + "can_abort": false, + "context_length": 8000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://inflection.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://developers.inflection.ai/tos", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -82761,8 +83021,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "b550f7af-571a-45fd-b442-b3327afaf38c", + "has_completions": false, + "id": "32d4977a-e055-4d2b-a351-5fc09039e363", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -82771,128 +83031,124 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 1024, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 4096, - "created_at": "2024-09-10T00:00:00+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "author": "inflection", + "context_length": 8000, + "created_at": "2024-10-11T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", + "description": "Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news.\n\nFor emotional intelligence similar to Pi, see [Inflect 3 Pi](/inflection/inflection-3-pi)\n\nSee [Inflection's announcement](https://inflection.ai/blog/enterprise) for more details.", "features": {}, - "group": "Mistral", + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Pixtral-12B-2409", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Pixtral 12B", + "name": "Inflection: Inflection 3 Productivity", "output_modalities": ["text"], - "permaslug": "mistralai/pixtral-12b", + "permaslug": "inflection/inflection-3-productivity", "reasoning_config": null, "router": null, - "short_name": "Pixtral 12B", - "slug": "mistralai/pixtral-12b", + "short_name": "Inflection 3 Productivity", + "slug": "inflection/inflection-3-productivity", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/pixtral-12b", - "model_variant_slug": "mistralai/pixtral-12b", + "model_variant_permaslug": "inflection/inflection-3-productivity", + "model_variant_slug": "inflection/inflection-3-productivity", "moderation_required": false, - "name": "Hyperbolic | mistralai/pixtral-12b", + "name": "Inflection | inflection/inflection-3-productivity", "pricing": { - "completion": "0.0000001", + "completion": "0.00001", "discount": 0, - "image": "0.0001445", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000025" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Inflection", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "InflectionLegacyAdapter", + "baseUrl": "https://layercake.pubwestus3.inf7ks8.com/external/api/inference", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://inflection.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://developers.inflection.ai/tos", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Inflection", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://inflection.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], - "isAbortable": true, - "isMultipartSupported": true, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Inflection", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "inflection", "statusPageUrl": null }, - "provider_model_id": "mistralai/Pixtral-12B-2409", - "provider_name": "Hyperbolic", + "provider_model_id": "inflection_3_productivity", + "provider_name": "Inflection", "provider_region": null, - "provider_slug": "hyperbolic/bf16", - "quantization": "bf16", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "logit_bias", - "top_k", - "min_p", - "repetition_penalty" - ], - "supports_multipart": true, + "provider_slug": "inflection", + "quantization": "unknown", + "supported_parameters": ["max_tokens", "temperature", "top_p", "stop"], + "supports_multipart": false, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Mistral", + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Pixtral-12B-2409", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Pixtral 12B", + "name": "Inflection: Inflection 3 Productivity", "output_modalities": ["text"], - "permaslug": "mistralai/pixtral-12b", + "permaslug": "inflection/inflection-3-productivity", "reasoning_config": null, "router": null, - "short_name": "Pixtral 12B", - "slug": "mistralai/pixtral-12b", + "short_name": "Inflection 3 Productivity", + "slug": "inflection/inflection-3-productivity", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - }, + } + ], + "name": "Inflection", + "slug": "inflection" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "displayName": "Liquid", + "icon": { + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.liquid.ai/&size=256" + }, + "models": [ { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "author": "liquid", + "context_length": 32768, + "created_at": "2025-10-20T14:34:49.795855+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -82900,23 +83156,19 @@ }, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "LFM2 is a new generation of hybrid models developed by Liquid AI, specifically designed for edge AI and on-device deployment. It sets a new standard in terms of quality, speed, and memory efficiency.", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -82926,7 +83178,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "58a0fa5b-ead3-4dcb-a15a-5c4b63d75434", + "id": "58fe225e-1cae-4b2c-b007-a39cc81be4ba", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -82939,9 +83191,9 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "author": "liquid", + "context_length": 32768, + "created_at": "2025-10-20T14:34:49.795855+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -82949,90 +83201,81 @@ }, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "LFM2 is a new generation of hybrid models developed by Liquid AI, specifically designed for edge AI and on-device deployment. It sets a new standard in terms of quality, speed, and memory efficiency.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "LiquidAI/LFM2-2.6B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "LiquidAI: LFM2-2.6B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "liquid/lfm-2.2-6b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "LFM2-2.6B", + "slug": "liquid/lfm-2.2-6b", + "updated_at": "2026-01-20T17:33:36.087047+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "liquid/lfm-2.2-6b", + "model_variant_slug": "liquid/lfm-2.2-6b", "moderation_required": false, - "name": "Hyperbolic | openai/gpt-oss-20b", + "name": "Liquid | liquid/lfm-2.2-6b", "pricing": { - "completion": "0.00000004", + "completion": "0.00000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000001" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Liquid", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://router.liquid.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Liquid", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.liquid.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": ["lfm-40b"], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Liquid", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "liquid", "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-20b", - "provider_name": "Hyperbolic", + "provider_model_id": "lfm2-2.6b", + "provider_name": "Liquid", "provider_region": null, - "provider_slug": "hyperbolic", + "provider_slug": "liquid", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -83040,70 +83283,70 @@ "frequency_penalty", "presence_penalty", "seed", - "logit_bias", "top_k", "min_p", "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "LiquidAI/LFM2-2.6B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "LiquidAI: LFM2-2.6B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "liquid/lfm-2.2-6b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "LFM2-2.6B", + "slug": "liquid/lfm-2.2-6b", + "updated_at": "2026-01-20T17:33:36.087047+00:00", "warning_message": null }, { - "author": "qwen", + "author": "liquid", "context_length": 32768, - "created_at": "2025-02-01T11:45:11.997326+00:00", - "default_parameters": {}, + "created_at": "2025-10-20T14:36:24.431587+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "description": "LFM2-8B-A1B is an efficient on-device Mixture-of-Experts (MoE) model from Liquid AI’s LFM2 family, built for fast, high-quality inference on edge hardware. It uses 8.3B total parameters with only ~1.5B active per token, delivering strong performance while keeping compute and memory usage low—making it ideal for phones, tablets, and laptops.", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -83113,7 +83356,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "b1ac73a7-5b74-4738-b563-ebc99483987e", + "id": "4cb2b30a-145b-4916-bbd0-29e6568e1749", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -83126,79 +83369,81 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-02-01T11:45:11.997326+00:00", - "default_parameters": {}, + "author": "liquid", + "context_length": 8192, + "created_at": "2025-10-20T14:36:24.431587+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", - "features": {}, - "group": "Qwen", + "description": "LFM2-8B-A1B is an efficient on-device Mixture-of-Experts (MoE) model from Liquid AI’s LFM2 family, built for fast, high-quality inference on edge hardware. It uses 8.3B total parameters with only ~1.5B active per token, delivering strong performance while keeping compute and memory usage low—making it ideal for phones, tablets, and laptops.", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "LiquidAI/LFM2-8B-A1B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "LiquidAI: LFM2-8B-A1B", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", - "reasoning_config": null, + "permaslug": "liquid/lfm2-8b-a1b", + "reasoning_config": {}, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "LFM2-8B-A1B", + "slug": "liquid/lfm2-8b-a1b", + "updated_at": "2026-01-20T17:31:27.697209+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen2.5-vl-72b-instruct", - "model_variant_slug": "qwen/qwen2.5-vl-72b-instruct", + "model_variant_permaslug": "liquid/lfm2-8b-a1b", + "model_variant_slug": "liquid/lfm2-8b-a1b", "moderation_required": false, - "name": "Hyperbolic | qwen/qwen2.5-vl-72b-instruct", + "name": "Liquid | liquid/lfm2-8b-a1b", "pricing": { - "completion": "0.0000006", + "completion": "0.00000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000001" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Liquid", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://router.liquid.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Liquid", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.liquid.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": ["lfm-40b"], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Liquid", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "liquid", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen2.5-VL-72B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "lfm2-8b-a1b", + "provider_name": "Liquid", "provider_region": null, - "provider_slug": "hyperbolic", + "provider_slug": "liquid", "quantization": "unknown", "supported_parameters": [ "max_tokens", @@ -83208,7 +83453,6 @@ "frequency_penalty", "presence_penalty", "seed", - "logit_bias", "top_k", "min_p", "repetition_penalty" @@ -83219,46 +83463,52 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Qwen", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "LiquidAI/LFM2-8B-A1B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "LiquidAI: LFM2-8B-A1B", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", - "reasoning_config": null, + "permaslug": "liquid/lfm2-8b-a1b", + "reasoning_config": {}, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "LFM2-8B-A1B", + "slug": "liquid/lfm2-8b-a1b", + "updated_at": "2026-01-20T17:31:27.697209+00:00", "warning_message": null }, { - "author": "qwen", + "author": "liquid", "context_length": 32768, - "created_at": "2024-08-28T00:00:00+00:00", - "default_parameters": {}, + "created_at": "2026-01-20T16:45:21.850791+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "LFM2.5-1.2B-Instruct is a compact, high-performance instruction-tuned model built for fast on-device AI. It delivers strong chat quality in a 1.2B parameter footprint, with efficient edge inference and broad runtime support.", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -83268,11 +83518,11 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "fae5bc3c-f799-4657-8d05-3cf6f489ed0c", + "id": "66d7c64f-4699-4a06-82ab-cba032872541", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, @@ -83281,80 +83531,82 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "liquid", "context_length": 32768, - "created_at": "2024-08-28T00:00:00+00:00", - "default_parameters": {}, + "created_at": "2026-01-20T16:45:21.850791+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5 VL 7B is a multimodal LLM from the Qwen Team with the following key enhancements:\n\n- SoTA understanding of images of various resolution & ratio: Qwen2.5-VL achieves state-of-the-art performance on visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, MTVQA, etc.\n\n- Understanding videos of 20min+: Qwen2.5-VL can understand videos over 20 minutes for high-quality video-based question answering, dialog, content creation, etc.\n\n- Agent that can operate your mobiles, robots, etc.: with the abilities of complex reasoning and decision making, Qwen2.5-VL can be integrated with devices like mobile phones, robots, etc., for automatic operation based on visual environment and text instructions.\n\n- Multilingual Support: to serve global users, besides English and Chinese, Qwen2.5-VL now supports the understanding of texts in different languages inside images, including most European languages, Japanese, Korean, Arabic, Vietnamese, etc.\n\nFor more details, see this [blog post](https://qwenlm.github.io/blog/qwen2-vl/) and [GitHub repo](https://github.com/QwenLM/Qwen2-VL).\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", - "features": {}, - "group": "Qwen", + "description": "LFM2.5-1.2B-Instruct is a compact, high-performance instruction-tuned model built for fast on-device AI. It delivers strong chat quality in a 1.2B parameter footprint, with efficient edge inference and broad runtime support.", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-7B-Instruct", + "hf_slug": "LiquidAI/LFM2.5-1.2B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5-VL 7B Instruct", + "name": "LiquidAI: LFM2.5-1.2B-Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2-vl-7b-instruct", - "reasoning_config": null, + "permaslug": "liquid/lfm-2.5-1.2b-instruct-20260120", + "reasoning_config": {}, "router": null, - "short_name": "Qwen2.5-VL 7B Instruct", - "slug": "qwen/qwen-2.5-vl-7b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "LFM2.5-1.2B-Instruct", + "slug": "liquid/lfm-2.5-1.2b-instruct", + "updated_at": "2026-01-20T17:31:20.872808+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen-2-vl-7b-instruct", - "model_variant_slug": "qwen/qwen-2.5-vl-7b-instruct", + "model_variant_permaslug": "liquid/lfm-2.5-1.2b-instruct-20260120:free", + "model_variant_slug": "liquid/lfm-2.5-1.2b-instruct:free", "moderation_required": false, - "name": "Hyperbolic | qwen/qwen-2-vl-7b-instruct", + "name": "Liquid | liquid/lfm-2.5-1.2b-instruct-20260120:free", "pricing": { - "completion": "0.0000002", + "completion": "0", "discount": 0, - "image": "0.0001445", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Liquid", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://router.liquid.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Liquid", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.liquid.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": ["lfm-40b"], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Liquid", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "liquid", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen2.5-VL-7B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "lfm2.5-1.2b-instruct", + "provider_name": "Liquid", "provider_region": null, - "provider_slug": "hyperbolic/bf16", - "quantization": "bf16", + "provider_slug": "liquid", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -83363,7 +83615,6 @@ "frequency_penalty", "presence_penalty", "seed", - "logit_bias", "top_k", "min_p", "repetition_penalty" @@ -83372,47 +83623,55 @@ "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, - "features": {}, - "group": "Qwen", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-7B-Instruct", + "hf_slug": "LiquidAI/LFM2.5-1.2B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5-VL 7B Instruct", + "name": "LiquidAI: LFM2.5-1.2B-Instruct (free)", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2-vl-7b-instruct", - "reasoning_config": null, + "permaslug": "liquid/lfm-2.5-1.2b-instruct-20260120", + "reasoning_config": {}, "router": null, - "short_name": "Qwen2.5-VL 7B Instruct", - "slug": "qwen/qwen-2.5-vl-7b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "LFM2.5-1.2B-Instruct (free)", + "slug": "liquid/lfm-2.5-1.2b-instruct", + "updated_at": "2026-01-20T17:31:20.872808+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, + "author": "liquid", + "context_length": 32768, + "created_at": "2026-01-20T16:45:27.038368+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "LFM2.5-1.2B-Thinking is a lightweight reasoning-focused model optimized for agentic tasks, data extraction, and RAG—while still running comfortably on edge devices. It supports long context (up to 32K tokens) and is designed to provide higher-quality “thinking” responses in a small 1.2B model.", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", "training": false }, "features": { + "reasoning_return_mechanism": "content-string", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -83422,11 +83681,11 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9f410d50-030e-47bd-aba8-261035cd01e2", + "id": "f3a0f8fa-826d-46eb-83a3-ba94b9a217d9", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, @@ -83435,91 +83694,93 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, + "author": "liquid", + "context_length": 32768, + "created_at": "2026-01-20T16:45:27.038368+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "LFM2.5-1.2B-Thinking is a lightweight reasoning-focused model optimized for agentic tasks, data extraction, and RAG—while still running comfortably on edge devices. It supports long context (up to 32K tokens) and is designed to provide higher-quality “thinking” responses in a small 1.2B model.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "LiquidAI/LFM2.5-1.2B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "LiquidAI: LFM2.5-1.2B-Thinking", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "liquid/lfm-2.5-1.2b-thinking-20260120", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "LFM2.5-1.2B-Thinking", + "slug": "liquid/lfm-2.5-1.2b-thinking", + "updated_at": "2026-01-20T17:31:17.106685+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "liquid/lfm-2.5-1.2b-thinking-20260120:free", + "model_variant_slug": "liquid/lfm-2.5-1.2b-thinking:free", "moderation_required": false, - "name": "Hyperbolic | qwen/qwen3-235b-a22b-07-25", + "name": "Liquid | liquid/lfm-2.5-1.2b-thinking-20260120:free", "pricing": { - "completion": "0.000002", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Liquid", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://router.liquid.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Liquid", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.liquid.ai/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": ["lfm-40b"], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Liquid", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "liquid", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", - "provider_name": "Hyperbolic", + "provider_model_id": "lfm2.5-1.2b-thinking", + "provider_name": "Liquid", "provider_region": null, - "provider_slug": "hyperbolic", + "provider_slug": "liquid", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -83527,67 +83788,85 @@ "frequency_penalty", "presence_penalty", "seed", - "logit_bias", "top_k", "min_p", "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "LiquidAI/LFM2.5-1.2B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "LiquidAI: LFM2.5-1.2B-Thinking (free)", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "liquid/lfm-2.5-1.2b-thinking-20260120", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "LFM2.5-1.2B-Thinking (free)", + "slug": "liquid/lfm-2.5-1.2b-thinking", + "updated_at": "2026-01-20T17:31:17.106685+00:00", "warning_message": null - }, + } + ], + "name": "Liquid", + "slug": "liquid" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Mancer", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" + }, + "models": [ { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-23T00:29:06+00:00", + "author": "alpindale", + "context_length": 6144, + "created_at": "2023-11-10T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["USER:", ""], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "MancerAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 6144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -83597,7 +83876,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "ebee417d-5f49-40f5-b1b3-3fc5f932b80d", + "id": "53185ad0-a0dc-4fad-82b2-bffade322302", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -83606,106 +83885,94 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 1024, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 1048576, - "created_at": "2025-07-23T00:29:06+00:00", + "author": "alpindale", + "context_length": 6144, + "created_at": "2023-11-10T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["USER:", ""], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge", + "features": {}, + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "alpindale/goliath-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "airoboros", "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Goliath 120B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "alpindale/goliath-120b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", + "short_name": "Goliath 120B", + "slug": "alpindale/goliath-120b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "model_variant_slug": "qwen/qwen3-coder", + "model_variant_permaslug": "alpindale/goliath-120b", + "model_variant_slug": "alpindale/goliath-120b", "moderation_required": false, - "name": "Hyperbolic | qwen/qwen3-coder-480b-a35b-07-25", + "name": "Mancer 2 | alpindale/goliath-120b", "pricing": { - "completion": "0.000002", + "completion": "0.0000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000375" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Mancer", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "MancerAdapter", + "baseUrl": "https://neuro.mancer.tech/oai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Mancer", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Mancer 2", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "mancer", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "goliath-120b", + "provider_name": "Mancer 2", "provider_region": null, - "provider_slug": "hyperbolic/fp8", - "quantization": "fp8", + "provider_slug": "mancer/int4", + "quantization": "int4", "supported_parameters": [ + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", + "repetition_penalty", "logit_bias", "top_k", "min_p", - "repetition_penalty" + "seed", + "top_a", + "logprobs", + "top_logprobs" ], "supports_multipart": true, "supports_reasoning": false, @@ -83713,72 +83980,59 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "alpindale/goliath-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "airoboros", "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Goliath 120B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "alpindale/goliath-120b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", + "short_name": "Goliath 120B", + "slug": "alpindale/goliath-120b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:38:04.192907+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "anthracite-org", + "context_length": 16384, + "created_at": "2024-10-22T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", + "description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).\n\nThe model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "MancerAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 16384, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": {}, - "supports_input_audio": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, + "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "124b570e-1be6-4cd2-9684-34fade4a89fd", + "id": "7f8d0c76-0eab-4606-9f4c-d0b4414e2cf1", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -83787,173 +84041,144 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:38:04.192907+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "anthracite-org", + "context_length": 32768, + "created_at": "2024-10-22T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).\n\nThe model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "hf_slug": "anthracite-org/magnum-v4-72b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "name": "Magnum v4 72B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "anthracite-org/magnum-v4-72b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Next 80B A3B Thinking", - "slug": "qwen/qwen3-next-80b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Magnum v4 72B", + "slug": "anthracite-org/magnum-v4-72b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-thinking", + "model_variant_permaslug": "anthracite-org/magnum-v4-72b", + "model_variant_slug": "anthracite-org/magnum-v4-72b", "moderation_required": false, - "name": "Hyperbolic | qwen/qwen3-next-80b-a3b-thinking-2509", + "name": "Mancer 2 | anthracite-org/magnum-v4-72b", "pricing": { - "completion": "0.0000003", + "completion": "0.000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.000003" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Mancer", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "MancerAdapter", + "baseUrl": "https://neuro.mancer.tech/oai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Mancer", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Mancer 2", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "mancer", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking", - "provider_name": "Hyperbolic", + "provider_model_id": "magnum-72b-v4", + "provider_name": "Mancer 2", "provider_region": null, - "provider_slug": "hyperbolic/bf16", - "quantization": "bf16", + "provider_slug": "mancer/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", + "repetition_penalty", "logit_bias", "top_k", "min_p", - "repetition_penalty", - "tools", - "tool_choice" + "seed", + "top_a", + "logprobs", + "top_logprobs" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "hf_slug": "anthracite-org/magnum-v4-72b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "name": "Magnum v4 72B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "anthracite-org/magnum-v4-72b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Next 80B A3B Thinking", - "slug": "qwen/qwen3-next-80b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Magnum v4 72B", + "slug": "anthracite-org/magnum-v4-72b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-03-05T21:06:54.875499+00:00", + "author": "mancer", + "context_length": 8000, + "created_at": "2023-08-02T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": ["###", ""], "default_system": null, - "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", + "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "MancerAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 8000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -83963,7 +84188,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5bbd820f-0208-4fec-9bd4-384f3e5ddaa1", + "id": "c8fb1a1b-9f2c-4dc6-b2be-43b25f999502", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -83972,161 +84197,140 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 2000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-03-05T21:06:54.875499+00:00", + "author": "mancer", + "context_length": 8000, + "created_at": "2023-08-02T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": ["###", ""], "default_system": null, - "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen", + "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.", + "features": {}, + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/QwQ-32B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwq", + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen: QwQ 32B", + "name": "Mancer: Weaver (alpha)", "output_modalities": ["text"], - "permaslug": "qwen/qwq-32b", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "mancer/weaver", + "reasoning_config": null, "router": null, - "short_name": "QwQ 32B", - "slug": "qwen/qwq-32b", + "short_name": "Weaver (alpha)", + "slug": "mancer/weaver", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwq-32b", - "model_variant_slug": "qwen/qwq-32b", + "model_variant_permaslug": "mancer/weaver", + "model_variant_slug": "mancer/weaver", "moderation_required": false, - "name": "Hyperbolic | qwen/qwq-32b", + "name": "Mancer 2 | mancer/weaver", "pricing": { - "completion": "0.0000004", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000075" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Mancer", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "MancerAdapter", + "baseUrl": "https://neuro.mancer.tech/oai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Mancer", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Mancer 2", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "mancer", "statusPageUrl": null }, - "provider_model_id": "Qwen/QwQ-32B", - "provider_name": "Hyperbolic", + "provider_model_id": "weaver-alpha", + "provider_name": "Mancer 2", "provider_region": null, - "provider_slug": "hyperbolic/bf16", - "quantization": "bf16", + "provider_slug": "mancer/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", + "repetition_penalty", "logit_bias", "top_k", "min_p", - "repetition_penalty" + "seed", + "top_a", + "logprobs", + "top_logprobs" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen", + "features": {}, + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/QwQ-32B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwq", + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen: QwQ 32B", + "name": "Mancer: Weaver (alpha)", "output_modalities": ["text"], - "permaslug": "qwen/qwq-32b", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "mancer/weaver", + "reasoning_config": null, "router": null, - "short_name": "QwQ 32B", - "slug": "qwen/qwq-32b", + "short_name": "Weaver (alpha)", + "slug": "mancer/weaver", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2024-09-19T00:00:00+00:00", + "author": "gryphe", + "context_length": 8192, + "created_at": "2023-07-02T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": ["###", ""], "default_system": null, - "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "MancerAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 8192, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -84136,7 +84340,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "0cfb6233-5061-4b1e-be3a-5b897343a176", + "id": "2ffda66b-b09e-405f-9a32-fe06a4097943", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -84145,84 +84349,78 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2024-09-19T00:00:00+00:00", + "author": "gryphe", + "context_length": 4096, + "created_at": "2023-07-02T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": ["###", ""], "default_system": null, - "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", "features": {}, - "group": "Qwen", + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-72B-Instruct", + "hf_slug": "Gryphe/MythoMax-L2-13b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen2.5 72B Instruct", + "name": "MythoMax 13B", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-72b-instruct", + "permaslug": "gryphe/mythomax-l2-13b", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 72B Instruct", - "slug": "qwen/qwen-2.5-72b-instruct", + "short_name": "MythoMax 13B", + "slug": "gryphe/mythomax-l2-13b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen-2.5-72b-instruct", - "model_variant_slug": "qwen/qwen-2.5-72b-instruct", + "model_variant_permaslug": "gryphe/mythomax-l2-13b", + "model_variant_slug": "gryphe/mythomax-l2-13b", "moderation_required": false, - "name": "Hyperbolic | qwen/qwen-2.5-72b-instruct", + "name": "Mancer 2 | gryphe/mythomax-l2-13b", "pricing": { - "completion": "0.0000004", + "completion": "0.00000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Mancer", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "MancerAdapter", + "baseUrl": "https://neuro.mancer.tech/oai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Mancer", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Mancer 2", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "mancer", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen2.5-72B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "mythomax", + "provider_name": "Mancer 2", "provider_region": null, - "provider_slug": "hyperbolic/bf16", - "quantization": "bf16", + "provider_slug": "mancer/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -84230,11 +84428,15 @@ "stop", "frequency_penalty", "presence_penalty", - "seed", + "repetition_penalty", "logit_bias", "top_k", "min_p", - "repetition_penalty" + "seed", + "top_a", + "response_format", + "logprobs", + "top_logprobs" ], "supports_multipart": true, "supports_reasoning": false, @@ -84243,45 +84445,48 @@ "variant": "standard" }, "features": {}, - "group": "Qwen", + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-72B-Instruct", + "hf_slug": "Gryphe/MythoMax-L2-13b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen2.5 72B Instruct", + "name": "MythoMax 13B", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-72b-instruct", + "permaslug": "gryphe/mythomax-l2-13b", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 72B Instruct", - "slug": "qwen/qwen-2.5-72b-instruct", + "short_name": "MythoMax 13B", + "slug": "gryphe/mythomax-l2-13b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32768, - "created_at": "2024-11-11T23:40:00.276653+00:00", + "author": "undi95", + "context_length": 6144, + "created_at": "2023-07-22T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": ["###", ""], "default_system": null, - "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\n\n- Significantly improvements in **code generation**, **code reasoning** and **code fixing**. \n- A more comprehensive foundation for real-world applications such as **Code Agents**. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.\n\nTo read more about its evaluation results, check out [Qwen 2.5 Coder's blog](https://qwenlm.github.io/blog/qwen2.5-coder-family/).", + "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", "endpoint": { - "adapter_name": "HyperbolicAdapter", + "adapter_name": "MancerAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 6144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -84291,7 +84496,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "47cf1361-80b9-49fc-a636-14a6a1665346", + "id": "75f85fa5-1ee8-474a-b0ae-24ccc2bbb113", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -84300,96 +84505,94 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 128000, - "created_at": "2024-11-11T23:40:00.276653+00:00", + "author": "undi95", + "context_length": 4096, + "created_at": "2023-07-22T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": ["###", ""], "default_system": null, - "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5:\n\n- Significantly improvements in **code generation**, **code reasoning** and **code fixing**. \n- A more comprehensive foundation for real-world applications such as **Code Agents**. Not only enhancing coding capabilities but also maintaining its strengths in mathematics and general competencies.\n\nTo read more about its evaluation results, check out [Qwen 2.5 Coder's blog](https://qwenlm.github.io/blog/qwen2.5-coder-family/).", + "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", "features": {}, - "group": "Qwen", + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-Coder-32B-Instruct", + "hf_slug": "Undi95/ReMM-SLERP-L2-13B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen2.5 Coder 32B Instruct", + "name": "ReMM SLERP 13B", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-coder-32b-instruct", + "permaslug": "undi95/remm-slerp-l2-13b", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 Coder 32B Instruct", - "slug": "qwen/qwen-2.5-coder-32b-instruct", + "short_name": "ReMM SLERP 13B", + "slug": "undi95/remm-slerp-l2-13b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen-2.5-coder-32b-instruct", - "model_variant_slug": "qwen/qwen-2.5-coder-32b-instruct", + "model_variant_permaslug": "undi95/remm-slerp-l2-13b", + "model_variant_slug": "undi95/remm-slerp-l2-13b", "moderation_required": false, - "name": "Hyperbolic | qwen/qwen-2.5-coder-32b-instruct", + "name": "Mancer 2 | undi95/remm-slerp-l2-13b", "pricing": { - "completion": "0.0000002", + "completion": "0.00000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, - "provider_display_name": "Hyperbolic", + "provider_display_name": "Mancer", "provider_info": { - "adapterName": "HyperbolicAdapter", - "baseUrl": "https://api.hyperbolic.xyz/v1", + "adapterName": "MancerAdapter", + "baseUrl": "https://neuro.mancer.tech/oai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://hyperbolic.xyz/privacy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://hyperbolic.xyz/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, - "displayName": "Hyperbolic", + "displayName": "Mancer", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://hyperbolic.xyz/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" }, - "ignoredProviderModels": ["Qwen/Qwen3-Coder-480B-A35B-Instruct"], + "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Hyperbolic", + "name": "Mancer 2", "owners": ["{}"], - "slug": "hyperbolic", + "slug": "mancer", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen2.5-Coder-32B-Instruct", - "provider_name": "Hyperbolic", + "provider_model_id": "remm-slerp", + "provider_name": "Mancer 2", "provider_region": null, - "provider_slug": "hyperbolic/fp8", + "provider_slug": "mancer/fp8", "quantization": "fp8", "supported_parameters": [ + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", + "repetition_penalty", "logit_bias", "top_k", "min_p", - "repetition_penalty" + "seed", + "top_a", + "logprobs", + "top_logprobs" ], "supports_multipart": true, "supports_reasoning": false, @@ -84398,65 +84601,53 @@ "variant": "standard" }, "features": {}, - "group": "Qwen", + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-Coder-32B-Instruct", + "hf_slug": "Undi95/ReMM-SLERP-L2-13B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen2.5 Coder 32B Instruct", + "name": "ReMM SLERP 13B", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-coder-32b-instruct", + "permaslug": "undi95/remm-slerp-l2-13b", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 Coder 32B Instruct", - "slug": "qwen/qwen-2.5-coder-32b-instruct", + "short_name": "ReMM SLERP 13B", + "slug": "undi95/remm-slerp-l2-13b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Hyperbolic", - "slug": "hyperbolic" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "Inception", - "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.inceptionlabs.ai/&size=256" - }, - "models": [ + }, { - "author": "inception", - "context_length": 128000, - "created_at": "2025-06-26T21:23:46+00:00", + "author": "z-ai", + "context_length": 131072, + "created_at": "2025-09-30T12:32:56.306946+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0, + "temperature": 0.6, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mercury is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like GPT-4.1 Nano and Claude 3.5 Haiku while matching their performance. Mercury's speed enables developers to provide responsive user experiences, including with voice agents, search interfaces, and chatbots. Read more in the [blog post]\n(https://www.inceptionlabs.ai/blog/introducing-mercury) here. ", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "MancerAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.inceptionlabs.ai/terms#privacy-policy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.inceptionlabs.ai/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": { + "response_format": true, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -84465,8 +84656,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "da1082ea-721e-460e-b330-034ae7aefc7f", + "has_completions": true, + "id": "ed92959c-5309-43f3-b897-8039cb7478af", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -84475,26 +84666,26 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "inception", - "context_length": 128000, - "created_at": "2025-06-26T21:23:46+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-09-30T12:32:56.306946+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0, + "temperature": 0.6, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mercury is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like GPT-4.1 Nano and Claude 3.5 Haiku while matching their performance. Mercury's speed enables developers to provide responsive user experiences, including with voice agents, search interfaces, and chatbots. Read more in the [blog post]\n(https://www.inceptionlabs.ai/blog/introducing-mercury) here. ", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, @@ -84506,92 +84697,93 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Inception: Mercury", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "inception/mercury", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Mercury", - "slug": "inception/mercury", - "updated_at": "2025-11-29T06:32:57.821746+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, - "model_variant_permaslug": "inception/mercury", - "model_variant_slug": "inception/mercury", + "model_variant_permaslug": "z-ai/glm-4.6", + "model_variant_slug": "z-ai/glm-4.6", "moderation_required": false, - "name": "Inception | inception/mercury", + "name": "Mancer 2 | z-ai/glm-4.6", "pricing": { - "completion": "0.000001", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "prompt": "0.00000045" }, - "provider_display_name": "Inception", + "provider_display_name": "Mancer", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.inceptionlabs.ai/v1", + "adapterName": "MancerAdapter", + "baseUrl": "https://neuro.mancer.tech/oai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.inceptionlabs.ai/terms#privacy-policy", + "privacyPolicyURL": "https://mancer.tech/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.inceptionlabs.ai/terms", + "termsOfServiceURL": "https://mancer.tech/terms", "training": false }, - "displayName": "Inception", + "displayName": "Mancer", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.inceptionlabs.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Inception", + "name": "Mancer 2", "owners": ["{}"], - "slug": "inception", + "slug": "mancer", "statusPageUrl": null }, - "provider_model_id": "mercury", - "provider_name": "Inception", + "provider_model_id": "glm-4.6", + "provider_name": "Mancer 2", "provider_region": null, - "provider_slug": "inception", - "quantization": "unknown", + "provider_slug": "mancer/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", + "response_format", "max_tokens", - "frequency_penalty", - "presence_penalty", - "stop", "temperature", "top_p", - "tools", - "tool_choice", - "response_format", - "structured_outputs", - "top_k" - ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "standard" - }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, + "stop", + "frequency_penalty", + "presence_penalty", + "repetition_penalty", + "logit_bias", + "top_k", + "min_p", + "seed", + "top_a", + "logprobs", + "top_logprobs", + "tools" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", "system_prompt": null } }, @@ -84603,45 +84795,78 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Inception: Mercury", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "inception/mercury", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Mercury", - "slug": "inception/mercury", - "updated_at": "2025-11-29T06:32:57.821746+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null - }, + } + ], + "name": "Mancer 2", + "slug": "mancer" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": true + }, + "datacenters": ["Global"], + "displayName": "Martian", + "headquarters": "Unknown", + "icon": { + "className": "rounded-sm", + "url": "https://placehold.co/100?text=Ma&font=roboto" + }, + "models": [], + "name": "Martian", + "slug": "martian" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "datacenters": ["US"], + "displayName": "MiniMax", + "headquarters": "SG", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://minimaxi.com/&size=256" + }, + "models": [ { - "author": "inception", - "context_length": 128000, - "created_at": "2025-04-30T17:24:40+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0, - "top_p": null - }, + "author": "minimax", + "context_length": 1000000, + "created_at": "2025-06-17T22:46:54.257159+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mercury Coder is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like Claude 3.5 Haiku and GPT-4o Mini while matching their performance. Mercury Coder's speed means that developers can stay in the flow while coding, enjoying rapid chat-based iteration and responsive code completion suggestions. On Copilot Arena, Mercury Coder ranks 1st in speed and ties for 2nd in quality. Read more in the [blog post here](https://www.inceptionlabs.ai/blog/introducing-mercury).", + "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": true, - "context_length": 128000, + "adapter_name": "MinimaxAdapter", + "can_abort": false, + "context_length": 1000000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.inceptionlabs.ai/terms#privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.inceptionlabs.ai/terms", + "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -84651,7 +84876,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "eb51fb37-11b0-42d1-924a-c25fa2375569", + "id": "dd5bc8d5-68a7-48a7-a680-db37579d5c14", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -84660,27 +84885,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 40000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "inception", - "context_length": 128000, - "created_at": "2025-04-30T17:24:40+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0, - "top_p": null - }, + "author": "minimax", + "context_length": 1000000, + "created_at": "2025-06-17T22:46:54.257159+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mercury Coder is the first diffusion large language model (dLLM). Applying a breakthrough discrete diffusion approach, the model runs 5-10x faster than even speed optimized models like Claude 3.5 Haiku and GPT-4o Mini while matching their performance. Mercury Coder's speed means that developers can stay in the flow while coding, enjoying rapid chat-based iteration and responsive code completion suggestions. On Copilot Arena, Mercury Coder ranks 1st in speed and ties for 2nd in quality. Read more in the [blog post here](https://www.inceptionlabs.ai/blog/introducing-mercury).", + "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, "group": "Other", @@ -84691,93 +84910,86 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Inception: Mercury Coder", + "name": "MiniMax: MiniMax M1", "output_modalities": ["text"], - "permaslug": "inception/mercury-coder-small-beta", + "permaslug": "minimax/minimax-m1", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Mercury Coder", - "slug": "inception/mercury-coder", - "updated_at": "2025-11-29T06:33:04.739524+00:00", + "short_name": "MiniMax M1", + "slug": "minimax/minimax-m1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "inception/mercury-coder-small-beta", - "model_variant_slug": "inception/mercury-coder", + "model_variant_permaslug": "minimax/minimax-m1", + "model_variant_slug": "minimax/minimax-m1", "moderation_required": false, - "name": "Inception | inception/mercury-coder-small-beta", + "name": "Minimax | minimax/minimax-m1", "pricing": { - "completion": "0.000001", + "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, - "provider_display_name": "Inception", + "provider_display_name": "MiniMax", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.inceptionlabs.ai/v1", + "adapterName": "MinimaxAdapter", + "baseUrl": "https://api.minimaxi.chat/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.inceptionlabs.ai/terms#privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.inceptionlabs.ai/terms", + "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", "training": false }, - "displayName": "Inception", + "displayName": "MiniMax", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, + "headquarters": "SG", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.inceptionlabs.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://minimaxi.com/&size=256" }, "ignoredProviderModels": [], - "isAbortable": true, + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Inception", + "name": "Minimax", "owners": ["{}"], - "slug": "inception", + "slug": "minimax", "statusPageUrl": null }, - "provider_model_id": "mercury-coder", - "provider_name": "Inception", + "provider_model_id": "MiniMax-M1", + "provider_name": "Minimax", "provider_region": null, - "provider_slug": "inception", + "provider_slug": "minimax", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", - "frequency_penalty", - "presence_penalty", - "stop", "temperature", - "top_p", - "tools", - "tool_choice", - "response_format", - "structured_outputs", - "top_k" + "top_p" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [], + "supports_reasoning": true, + "supports_tool_parameters": false, + "variable_pricings": [ + { + "completions": "0.0000022", + "prompt": "0.0000013", + "threshold": 200000, + "type": "prompt-threshold" + } + ], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, "group": "Other", @@ -84788,55 +85000,47 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Inception: Mercury Coder", + "name": "MiniMax: MiniMax M1", "output_modalities": ["text"], - "permaslug": "inception/mercury-coder-small-beta", + "permaslug": "minimax/minimax-m1", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Mercury Coder", - "slug": "inception/mercury-coder", - "updated_at": "2025-11-29T06:33:04.739524+00:00", + "short_name": "MiniMax M1", + "slug": "minimax/minimax-m1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Inception", - "slug": "inception" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "Infermatic", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://infermatic.ai/&size=256" - }, - "models": [ + }, { - "author": "sao10k", - "context_length": 16000, - "created_at": "2025-01-08T02:20:54.222148+00:00", - "default_parameters": {}, + "author": "minimax", + "context_length": 204800, + "created_at": "2025-10-23T20:41:33.120854+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).", + "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "endpoint": { - "adapter_name": "InfermaticAdapter", - "can_abort": true, - "context_length": 16000, + "adapter_name": "MinimaxAdapter", + "can_abort": false, + "context_length": 204800, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", + "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", + "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", "training": false }, "features": { + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "content-string", + "should_send_reasoning_text_in_text_content": false, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -84845,176 +85049,178 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "5282475d-e762-4788-b9ea-de6578d81a57", + "has_completions": false, + "id": "46585386-044e-4e91-852c-f56bf056e1af", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 500, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sao10k", - "context_length": 16000, - "created_at": "2025-01-08T02:20:54.222148+00:00", - "default_parameters": {}, + "author": "minimax", + "context_length": 204800, + "created_at": "2025-10-23T20:41:33.120854+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).", - "features": {}, - "group": "Llama3", + "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Sao10K/L3.1-70B-Hanami-x1", + "hf_slug": "MiniMaxAI/MiniMax-M2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Sao10K: Llama 3.1 70B Hanami x1", + "name": "MiniMax: MiniMax M2", "output_modalities": ["text"], - "permaslug": "sao10k/l3.1-70b-hanami-x1", - "reasoning_config": null, + "permaslug": "minimax/minimax-m2", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 70B Hanami x1", - "slug": "sao10k/l3.1-70b-hanami-x1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2", + "slug": "minimax/minimax-m2", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "sao10k/l3.1-70b-hanami-x1", - "model_variant_slug": "sao10k/l3.1-70b-hanami-x1", + "model_variant_permaslug": "minimax/minimax-m2", + "model_variant_slug": "minimax/minimax-m2", "moderation_required": false, - "name": "Infermatic | sao10k/l3.1-70b-hanami-x1", + "name": "Minimax | minimax/minimax-m2", "pricing": { - "completion": "0.000003", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "completion": "0.00000102", + "discount": 0.15, + "prompt": "0.000000255" }, - "provider_display_name": "Infermatic", + "provider_display_name": "MiniMax", "provider_info": { - "adapterName": "InfermaticAdapter", - "baseUrl": "https://api.totalgpt.ai/v1", + "adapterName": "MinimaxAdapter", + "baseUrl": "https://api.minimaxi.chat/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", - "retainsPrompts": false, - "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", + "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", "training": false }, - "displayName": "Infermatic", + "displayName": "MiniMax", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "SG", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://infermatic.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://minimaxi.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen2.5-72B-Instruct-Turbo", - "NousResearch-DeepHermes-3-Mistral-24B-Preview", - "deepseek-ai-DeepSeek-R1-Distill-Llama-70B", - "Qwen2-72B-Instruct", - "Meta-Llama-Guard-2-8B", - "TheDrummer-Anubis-70B-v1-FP8-Dynamic", - "anthracite-org-magnum-v2-72b-FP8-Dynamic", - "TheDrummer-Valkyrie-49B-v1", - "TTS-hexgrad-Kokoro-82M", - "Qwen-Qwen3-235B-A22B-Thinking-2507", - "TheDrummer-Anubis-70B-v1.1-FP8-Dynamic", - "nvidia-Llama-3.1-Nemotron-70B-Instruct-HF", - "Strawberrylemonade-L3-70B-v1.1-FP8-Dynamic", - "Mixtral-8x7B-Instruct-v0.1", - "Sao10K-70B-L3.3-Cirrus-x1", - "TheDrummer-Fallen-Llama-3.3-R1-70B-v1", - "Doctor-Shotgun-L3.3-70B-Magnum-v4-SE", - "Qwen-Qwen3-30B-A3B", - "Midnight-Miqu-70B-v1.5", - "Sao10K-72B-Qwen2.5-Kunou-v1-FP8-Dynamic", - "intfloat-multilingual-e5-base", - "Llama-3.2-11B-Vision-Instruct", - "anthracite-org-magnum-v4-72b-FP8-Dynamic", - "Sao10K-L3.3-70B-Euryale-v2.3-FP8-Dynamic" - ], - "isAbortable": true, + "ignoredProviderModels": [], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Infermatic", + "name": "Minimax", "owners": ["{}"], - "slug": "infermatic", + "slug": "minimax", "statusPageUrl": null }, - "provider_model_id": "Sao10K-L3.1-70B-Hanami-x1", - "provider_name": "Infermatic", + "provider_model_id": "MiniMax-M2-Stable", + "provider_name": "Minimax", "provider_region": null, - "provider_slug": "infermatic/bf16", - "quantization": "bf16", + "provider_slug": "minimax/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", - "seed" + "tool_choice", + "tools" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Sao10K/L3.1-70B-Hanami-x1", + "hf_slug": "MiniMaxAI/MiniMax-M2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Sao10K: Llama 3.1 70B Hanami x1", + "name": "MiniMax: MiniMax M2", "output_modalities": ["text"], - "permaslug": "sao10k/l3.1-70b-hanami-x1", - "reasoning_config": null, + "permaslug": "minimax/minimax-m2", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 70B Hanami x1", - "slug": "sao10k/l3.1-70b-hanami-x1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2", + "slug": "minimax/minimax-m2", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "raifle", - "context_length": 16000, - "created_at": "2024-11-08T22:31:23.953049+00:00", - "default_parameters": {}, - "default_stops": ["USER:", ""], + "author": "minimax", + "context_length": 65536, + "created_at": "2026-01-23T14:07:19.197529+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling", + "description": "MiniMax M2-her is a dialogue-first large language model built for immersive roleplay, character-driven chat, and expressive multi-turn conversations. Designed to stay consistent in tone and personality, it supports rich message roles (user_system, group, sample_message_user, sample_message_ai) and can learn from example dialogue to better match the style and pacing of your scenario, making it a strong choice for storytelling, companions, and conversational experiences where natural flow and vivid interaction matter most.", "endpoint": { - "adapter_name": "InfermaticAdapter", - "can_abort": true, - "context_length": 16000, + "adapter_name": "MinimaxChatCompletionV2Adapter", + "can_abort": false, + "context_length": 65536, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", - "retainsPrompts": false, - "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", + "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", "training": false }, "features": { + "supports_multipart": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -85023,8 +85229,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "65206957-c772-4743-93ff-45900a190ddd", + "has_completions": false, + "id": "38c32567-be85-4c82-a6d8-592a4158d436", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -85033,359 +85239,288 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "raifle", - "context_length": 16000, - "created_at": "2024-11-08T22:31:23.953049+00:00", - "default_parameters": {}, - "default_stops": ["USER:", ""], + "author": "minimax", + "context_length": 65536, + "created_at": "2026-01-23T14:07:19.197529+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "SorcererLM is an advanced RP and storytelling model, built as a Low-rank 16-bit LoRA fine-tuned on [WizardLM-2 8x22B](/microsoft/wizardlm-2-8x22b).\n\n- Advanced reasoning and emotional intelligence for engaging and immersive interactions\n- Vivid writing capabilities enriched with spatial and contextual awareness\n- Enhanced narrative depth, promoting creative and dynamic storytelling", - "features": {}, - "group": "Mistral", + "description": "MiniMax M2-her is a dialogue-first large language model built for immersive roleplay, character-driven chat, and expressive multi-turn conversations. Designed to stay consistent in tone and personality, it supports rich message roles (user_system, group, sample_message_user, sample_message_ai) and can learn from example dialogue to better match the style and pacing of your scenario, making it a strong choice for storytelling, companions, and conversational experiences where natural flow and vivid interaction matter most.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "rAIfle/SorcererLM-8x22b-bf16", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "vicuna", + "instruct_type": null, "model_version_group_id": null, - "name": "SorcererLM 8x22B", + "name": "MiniMax: MiniMax M2-her", "output_modalities": ["text"], - "permaslug": "raifle/sorcererlm-8x22b", - "reasoning_config": null, + "permaslug": "minimax/minimax-m2-her-20260123", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "SorcererLM 8x22B", - "slug": "raifle/sorcererlm-8x22b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2-her", + "slug": "minimax/minimax-m2-her", + "updated_at": "2026-01-27T13:03:37.499372+00:00", "warning_message": null }, - "model_variant_permaslug": "raifle/sorcererlm-8x22b", - "model_variant_slug": "raifle/sorcererlm-8x22b", + "model_variant_permaslug": "minimax/minimax-m2-her-20260123", + "model_variant_slug": "minimax/minimax-m2-her", "moderation_required": false, - "name": "Infermatic | raifle/sorcererlm-8x22b", + "name": "Minimax | minimax/minimax-m2-her-20260123", "pricing": { - "completion": "0.0000045", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000045", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000003", + "prompt": "0.0000003" }, - "provider_display_name": "Infermatic", + "provider_display_name": "MiniMax", "provider_info": { - "adapterName": "InfermaticAdapter", - "baseUrl": "https://api.totalgpt.ai/v1", + "adapterName": "MinimaxChatCompletionV2Adapter", + "baseUrl": "https://api.minimax.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", - "retainsPrompts": false, - "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", + "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", "training": false }, - "displayName": "Infermatic", + "displayName": "MiniMax", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "SG", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://infermatic.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://minimaxi.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen2.5-72B-Instruct-Turbo", - "NousResearch-DeepHermes-3-Mistral-24B-Preview", - "deepseek-ai-DeepSeek-R1-Distill-Llama-70B", - "Qwen2-72B-Instruct", - "Meta-Llama-Guard-2-8B", - "TheDrummer-Anubis-70B-v1-FP8-Dynamic", - "anthracite-org-magnum-v2-72b-FP8-Dynamic", - "TheDrummer-Valkyrie-49B-v1", - "TTS-hexgrad-Kokoro-82M", - "Qwen-Qwen3-235B-A22B-Thinking-2507", - "TheDrummer-Anubis-70B-v1.1-FP8-Dynamic", - "nvidia-Llama-3.1-Nemotron-70B-Instruct-HF", - "Strawberrylemonade-L3-70B-v1.1-FP8-Dynamic", - "Mixtral-8x7B-Instruct-v0.1", - "Sao10K-70B-L3.3-Cirrus-x1", - "TheDrummer-Fallen-Llama-3.3-R1-70B-v1", - "Doctor-Shotgun-L3.3-70B-Magnum-v4-SE", - "Qwen-Qwen3-30B-A3B", - "Midnight-Miqu-70B-v1.5", - "Sao10K-72B-Qwen2.5-Kunou-v1-FP8-Dynamic", - "intfloat-multilingual-e5-base", - "Llama-3.2-11B-Vision-Instruct", - "anthracite-org-magnum-v4-72b-FP8-Dynamic", - "Sao10K-L3.3-70B-Euryale-v2.3-FP8-Dynamic" - ], - "isAbortable": true, + "ignoredProviderModels": [], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Infermatic", + "name": "Minimax", "owners": ["{}"], - "slug": "infermatic", + "slug": "minimax", "statusPageUrl": null }, - "provider_model_id": "rAIfle-SorcererLM-8x22b-bf16", - "provider_name": "Infermatic", + "provider_model_id": "M2-her", + "provider_name": "Minimax", "provider_region": null, - "provider_slug": "infermatic", + "provider_slug": "minimax", "quantization": "unknown", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", - "seed" - ], - "supports_multipart": true, + "supported_parameters": ["max_tokens", "temperature", "top_p"], + "supports_multipart": false, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "rAIfle/SorcererLM-8x22b-bf16", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "vicuna", + "instruct_type": null, "model_version_group_id": null, - "name": "SorcererLM 8x22B", + "name": "MiniMax: MiniMax M2-her", "output_modalities": ["text"], - "permaslug": "raifle/sorcererlm-8x22b", - "reasoning_config": null, + "permaslug": "minimax/minimax-m2-her-20260123", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "SorcererLM 8x22B", - "slug": "raifle/sorcererlm-8x22b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2-her", + "slug": "minimax/minimax-m2-her", + "updated_at": "2026-01-27T13:03:37.499372+00:00", "warning_message": null }, { - "author": "thedrummer", - "context_length": 32768, - "created_at": "2024-09-30T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "Other", + "context_length": 204800, + "created_at": "2025-08-26T20:08:47.000Z", + "default_parameters": null, + "default_stops": [], "default_system": null, - "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", + "description": "Free version of MiniMax M2.1 for use in Kilo for Slack only", "endpoint": { - "adapter_name": "InfermaticAdapter", + "adapter_name": "other", "can_abort": true, - "context_length": 32768, + "context_length": 204800, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", - "retainsPrompts": false, - "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", - "training": false - }, - "features": { - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } + "retainsPrompts": true, + "training": true }, + "features": null, "has_chat_completions": true, - "has_completions": true, - "id": "caf29916-6adb-45f8-b895-dd863415d3ed", + "has_completions": false, + "id": "minimax/minimax-m2.1:slackbot", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, - "max_prompt_tokens": null, + "max_completion_tokens": 131072, + "max_prompt_images": null, + "max_prompt_tokens": 204800, "max_tokens_per_image": null, "model": { - "author": "thedrummer", - "context_length": 32768, - "created_at": "2024-09-30T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "Other", + "context_length": 204800, + "created_at": "2025-08-26T20:08:47.000Z", + "default_parameters": null, + "default_stops": [], "default_system": null, - "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", - "features": {}, - "group": "Qwen", + "description": "Free version of MiniMax M2.1 for use in Kilo for Slack only", + "features": null, + "group": "other", "has_text_output": true, - "hf_slug": "TheDrummer/Rocinante-12B-v1.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "TheDrummer: Rocinante 12B", + "name": "MiniMax: MiniMax M2.1 (Free for Kilo for Slack)", "output_modalities": ["text"], - "permaslug": "thedrummer/rocinante-12b", + "permaslug": "minimax/minimax-m2.1:slackbot", "reasoning_config": null, "router": null, - "short_name": "Rocinante 12B", - "slug": "thedrummer/rocinante-12b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax: MiniMax M2.1 (Free for Kilo for Slack)", + "slug": "minimax/minimax-m2.1:slackbot", + "updated_at": "2026-02-10T10:49:03.904Z", "warning_message": null }, - "model_variant_permaslug": "thedrummer/rocinante-12b", - "model_variant_slug": "thedrummer/rocinante-12b", + "model_variant_permaslug": "minimax/minimax-m2.1:slackbot", + "model_variant_slug": "minimax/minimax-m2.1:slackbot", "moderation_required": false, - "name": "Infermatic | thedrummer/rocinante-12b", + "name": "MiniMax: MiniMax M2.1 (Free for Kilo for Slack)", "pricing": { - "completion": "0.0000005", + "completion": "0.0000000", "discount": 0, "image": "0", "image_output": "0", "internal_reasoning": "0", - "prompt": "0.00000025", + "prompt": "0.0000000", "request": "0", "web_search": "0" }, - "provider_display_name": "Infermatic", + "provider_display_name": "Other", "provider_info": { - "adapterName": "InfermaticAdapter", - "baseUrl": "https://api.totalgpt.ai/v1", - "byokEnabled": true, + "adapterName": "other", + "baseUrl": "https://kilo.ai", + "byokEnabled": false, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://infermatic.ai/privacy-policy/", - "retainsPrompts": false, - "termsOfServiceURL": "https://infermatic.ai/terms-and-conditions/", - "training": false + "retainsPrompts": true, + "training": true }, - "displayName": "Infermatic", - "editors": ["{}"], + "displayName": "Other", + "editors": [], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "Unknown", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://infermatic.ai/&size=256" + "className": "rounded-sm", + "url": "https://via.placeholder.com/32x32/000000/FFFFFF?text=S" }, - "ignoredProviderModels": [ - "Qwen2.5-72B-Instruct-Turbo", - "NousResearch-DeepHermes-3-Mistral-24B-Preview", - "deepseek-ai-DeepSeek-R1-Distill-Llama-70B", - "Qwen2-72B-Instruct", - "Meta-Llama-Guard-2-8B", - "TheDrummer-Anubis-70B-v1-FP8-Dynamic", - "anthracite-org-magnum-v2-72b-FP8-Dynamic", - "TheDrummer-Valkyrie-49B-v1", - "TTS-hexgrad-Kokoro-82M", - "Qwen-Qwen3-235B-A22B-Thinking-2507", - "TheDrummer-Anubis-70B-v1.1-FP8-Dynamic", - "nvidia-Llama-3.1-Nemotron-70B-Instruct-HF", - "Strawberrylemonade-L3-70B-v1.1-FP8-Dynamic", - "Mixtral-8x7B-Instruct-v0.1", - "Sao10K-70B-L3.3-Cirrus-x1", - "TheDrummer-Fallen-Llama-3.3-R1-70B-v1", - "Doctor-Shotgun-L3.3-70B-Magnum-v4-SE", - "Qwen-Qwen3-30B-A3B", - "Midnight-Miqu-70B-v1.5", - "Sao10K-72B-Qwen2.5-Kunou-v1-FP8-Dynamic", - "intfloat-multilingual-e5-base", - "Llama-3.2-11B-Vision-Instruct", - "anthracite-org-magnum-v4-72b-FP8-Dynamic", - "Sao10K-L3.3-70B-Euryale-v2.3-FP8-Dynamic" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Infermatic", - "owners": ["{}"], - "slug": "infermatic", + "name": "Other", + "owners": [], + "slug": "other", "statusPageUrl": null }, - "provider_model_id": "TheDrummer-Rocinante-12B-v1.1", - "provider_name": "Infermatic", + "provider_model_id": "minimax/minimax-m2.1:slackbot", + "provider_name": "Other", "provider_region": null, - "provider_slug": "infermatic/bf16", - "quantization": "bf16", + "provider_slug": "other", + "quantization": null, "supported_parameters": [ "max_tokens", "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", - "seed" + "tools", + "reasoning", + "include_reasoning" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "default" }, - "features": {}, - "group": "Qwen", + "features": null, + "group": "other", "has_text_output": true, - "hf_slug": "TheDrummer/Rocinante-12B-v1.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "TheDrummer: Rocinante 12B", + "name": "MiniMax: MiniMax M2.1 (Free for Kilo for Slack)", "output_modalities": ["text"], - "permaslug": "thedrummer/rocinante-12b", + "permaslug": "minimax/minimax-m2.1:slackbot", "reasoning_config": null, "router": null, - "short_name": "Rocinante 12B", - "slug": "thedrummer/rocinante-12b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax: MiniMax M2.1 (Free for Kilo for Slack)", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-02-10T10:49:03.904Z", "warning_message": null - } - ], - "name": "Infermatic", - "slug": "infermatic" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": false - }, - "displayName": "Inflection", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://inflection.ai/&size=256" - }, - "models": [ + }, { - "author": "inflection", - "context_length": 8000, - "created_at": "2024-10-11T00:00:00+00:00", + "author": "minimax", + "context_length": 1000192, + "created_at": "2025-01-15T04:31:02.677929+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay.\n\nPi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.", + "description": "MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding. It has 456 billion parameters, with 45.9 billion parameters activated per inference, and can handle a context of up to 4 million tokens.\n\nThe text model adopts a hybrid architecture that combines Lightning Attention, Softmax Attention, and Mixture-of-Experts (MoE). The image model adopts the “ViT-MLP-LLM” framework and is trained on top of the text model.\n\nTo read more about the release, see: https://www.minimaxi.com/en/news/minimax-01-series-2", "endpoint": { - "adapter_name": "InflectionLegacyAdapter", + "adapter_name": "MinimaxAdapter", "can_abort": false, - "context_length": 8000, + "context_length": 1000192, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://inflection.ai/privacy-policy", + "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://developers.inflection.ai/tos", + "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -85395,7 +85530,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "2dbf0c2a-b934-47dd-983d-0ad1d91a4838", + "id": "352546d2-3758-4aa1-9e98-e1a83748aa4e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -85404,85 +85539,81 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 1024, + "max_completion_tokens": 1000192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "inflection", - "context_length": 8000, - "created_at": "2024-10-11T00:00:00+00:00", + "author": "minimax", + "context_length": 1000000, + "created_at": "2025-01-15T04:31:02.677929+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay.\n\nPi has been trained to mirror your tone and style, if you use more emojis, so will Pi! Try experimenting with various prompts and conversation styles.", + "description": "MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding. It has 456 billion parameters, with 45.9 billion parameters activated per inference, and can handle a context of up to 4 million tokens.\n\nThe text model adopts a hybrid architecture that combines Lightning Attention, Softmax Attention, and Mixture-of-Experts (MoE). The image model adopts the “ViT-MLP-LLM” framework and is trained on top of the text model.\n\nTo read more about the release, see: https://www.minimaxi.com/en/news/minimax-01-series-2", "features": {}, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "MiniMaxAI/MiniMax-Text-01", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Inflection: Inflection 3 Pi", + "name": "MiniMax: MiniMax-01", "output_modalities": ["text"], - "permaslug": "inflection/inflection-3-pi", + "permaslug": "minimax/minimax-01", "reasoning_config": null, "router": null, - "short_name": "Inflection 3 Pi", - "slug": "inflection/inflection-3-pi", + "short_name": "MiniMax-01", + "slug": "minimax/minimax-01", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "inflection/inflection-3-pi", - "model_variant_slug": "inflection/inflection-3-pi", + "model_variant_permaslug": "minimax/minimax-01", + "model_variant_slug": "minimax/minimax-01", "moderation_required": false, - "name": "Inflection | inflection/inflection-3-pi", + "name": "Minimax | minimax/minimax-01", "pricing": { - "completion": "0.00001", + "completion": "0.0000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000025", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, - "provider_display_name": "Inflection", + "provider_display_name": "MiniMax", "provider_info": { - "adapterName": "InflectionLegacyAdapter", - "baseUrl": "https://layercake.pubwestus3.inf7ks8.com/external/api/inference", + "adapterName": "MinimaxAdapter", + "baseUrl": "https://api.minimaxi.chat/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://inflection.ai/privacy-policy", + "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://developers.inflection.ai/tos", + "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", "training": false }, - "displayName": "Inflection", + "displayName": "MiniMax", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, + "headquarters": "SG", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://inflection.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://minimaxi.com/&size=256" }, "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": false, + "isMultipartSupported": true, "moderationRequired": false, - "name": "Inflection", + "name": "Minimax", "owners": ["{}"], - "slug": "inflection", + "slug": "minimax", "statusPageUrl": null }, - "provider_model_id": "inflection_3_pi", - "provider_name": "Inflection", + "provider_model_id": "MiniMax-Text-01", + "provider_name": "Minimax", "provider_region": null, - "provider_slug": "inflection", + "provider_slug": "minimax", "quantization": "unknown", - "supported_parameters": ["max_tokens", "temperature", "top_p", "stop"], - "supports_multipart": false, + "supported_parameters": ["max_tokens", "temperature", "top_p"], + "supports_multipart": true, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], @@ -85491,39 +85622,57 @@ "features": {}, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "MiniMaxAI/MiniMax-Text-01", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Inflection: Inflection 3 Pi", + "name": "MiniMax: MiniMax-01", "output_modalities": ["text"], - "permaslug": "inflection/inflection-3-pi", + "permaslug": "minimax/minimax-01", "reasoning_config": null, "router": null, - "short_name": "Inflection 3 Pi", - "slug": "inflection/inflection-3-pi", + "short_name": "MiniMax-01", + "slug": "minimax/minimax-01", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - }, + } + ], + "name": "Minimax", + "slug": "minimax" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "displayName": "Mistral", + "headquarters": "FR", + "icon": { + "url": "/images/icons/Mistral.png" + }, + "models": [ { - "author": "inflection", - "context_length": 8000, - "created_at": "2024-10-11T00:00:00+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 128000, + "created_at": "2024-02-26T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, "default_stops": [], "default_system": null, - "description": "Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news.\n\nFor emotional intelligence similar to Pi, see [Inflect 3 Pi](/inflection/inflection-3-pi)\n\nSee [Inflection's announcement](https://inflection.ai/blog/enterprise) for more details.", + "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.", "endpoint": { - "adapter_name": "InflectionLegacyAdapter", + "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 8000, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://inflection.ai/privacy-policy", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://developers.inflection.ai/tos", + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { @@ -85536,7 +85685,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "32d4977a-e055-4d2b-a351-5fc09039e363", + "id": "f1a57233-f872-4fa0-ad37-66c9a6b00469", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -85545,149 +85694,173 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 1024, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "inflection", - "context_length": 8000, - "created_at": "2024-10-11T00:00:00+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 128000, + "created_at": "2024-02-26T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, "default_stops": [], "default_system": null, - "description": "Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news.\n\nFor emotional intelligence similar to Pi, see [Inflect 3 Pi](/inflection/inflection-3-pi)\n\nSee [Inflection's announcement](https://inflection.ai/blog/enterprise) for more details.", + "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.", "features": {}, - "group": "Other", + "group": "Mistral", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Inflection: Inflection 3 Productivity", + "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", + "name": "Mistral Large", "output_modalities": ["text"], - "permaslug": "inflection/inflection-3-productivity", + "permaslug": "mistralai/mistral-large", "reasoning_config": null, "router": null, - "short_name": "Inflection 3 Productivity", - "slug": "inflection/inflection-3-productivity", + "short_name": "Mistral Large", + "slug": "mistralai/mistral-large", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "inflection/inflection-3-productivity", - "model_variant_slug": "inflection/inflection-3-productivity", + "model_variant_permaslug": "mistralai/mistral-large", + "model_variant_slug": "mistralai/mistral-large", "moderation_required": false, - "name": "Inflection | inflection/inflection-3-productivity", + "name": "Mistral | mistralai/mistral-large", "pricing": { - "completion": "0.00001", + "completion": "0.000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000025", - "request": "0", - "web_search": "0" + "prompt": "0.000002" }, - "provider_display_name": "Inflection", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "InflectionLegacyAdapter", - "baseUrl": "https://layercake.pubwestus3.inf7ks8.com/external/api/inference", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://inflection.ai/privacy-policy", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://developers.inflection.ai/tos", + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Inflection", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://inflection.ai/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, - "moderationRequired": false, - "name": "Inflection", - "owners": ["{}"], - "slug": "inflection", - "statusPageUrl": null - }, - "provider_model_id": "inflection_3_productivity", - "provider_name": "Inflection", - "provider_region": null, - "provider_slug": "inflection", - "quantization": "unknown", - "supported_parameters": ["max_tokens", "temperature", "top_p", "stop"], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": false, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Mistral", + "owners": ["{}"], + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" + }, + "provider_model_id": "mistral-large-2407", + "provider_name": "Mistral", + "provider_region": null, + "provider_slug": "mistral", + "quantization": "unknown", + "supported_parameters": [ + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "seed", + "response_format", + "structured_outputs", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Other", + "group": "Mistral", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Inflection: Inflection 3 Productivity", + "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", + "name": "Mistral Large", "output_modalities": ["text"], - "permaslug": "inflection/inflection-3-productivity", + "permaslug": "mistralai/mistral-large", "reasoning_config": null, "router": null, - "short_name": "Inflection 3 Productivity", - "slug": "inflection/inflection-3-productivity", + "short_name": "Mistral Large", + "slug": "mistralai/mistral-large", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Inflection", - "slug": "inflection" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": false - }, - "displayName": "Liquid", - "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.liquid.ai/&size=256" - }, - "models": [ + }, { - "author": "liquid", - "context_length": 32768, - "created_at": "2025-10-20T14:34:49.795855+00:00", + "author": "mistralai", + "context_length": 131072, + "created_at": "2024-11-19T01:06:55.27469+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "LFM2 is a new generation of hybrid models developed by Liquid AI, specifically designed for edge AI and on-device deployment. It sets a new standard in terms of quality, speed, and memory efficiency.", + "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": true, - "context_length": 32768, + "adapter_name": "MistralAdapter", + "can_abort": false, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -85696,8 +85869,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "58fe225e-1cae-4b2c-b007-a39cc81be4ba", + "has_completions": false, + "id": "4a128170-b056-42d7-8462-a5cea647f9ad", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -85710,93 +85883,107 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "liquid", - "context_length": 8192, - "created_at": "2025-10-20T14:34:49.795855+00:00", + "author": "mistralai", + "context_length": 128000, + "created_at": "2024-11-19T01:06:55.27469+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "LFM2 is a new generation of hybrid models developed by Liquid AI, specifically designed for edge AI and on-device deployment. It sets a new standard in terms of quality, speed, and memory efficiency.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "LiquidAI/LFM2-2.6B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "LiquidAI/LFM2-2.6B", + "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", + "name": "Mistral Large 2407", "output_modalities": ["text"], - "permaslug": "liquid/lfm-2.2-6b", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/mistral-large-2407", + "reasoning_config": null, "router": null, - "short_name": "LiquidAI/LFM2-2.6B", - "slug": "liquid/lfm-2.2-6b", + "short_name": "Mistral Large 2407", + "slug": "mistralai/mistral-large-2407", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "liquid/lfm-2.2-6b", - "model_variant_slug": "liquid/lfm-2.2-6b", + "model_variant_permaslug": "mistralai/mistral-large-2407", + "model_variant_slug": "mistralai/mistral-large-2407", "moderation_required": false, - "name": "Liquid | liquid/lfm-2.2-6b", + "name": "Mistral | mistralai/mistral-large-2407", "pricing": { - "completion": "0.00000002", + "completion": "0.000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000001", - "request": "0", - "web_search": "0" + "prompt": "0.000002" }, - "provider_display_name": "Liquid", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://router.liquid.ai/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Liquid", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "FR", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.liquid.ai/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": ["lfm-40b"], - "isAbortable": true, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Liquid", + "name": "Mistral", "owners": ["{}"], - "slug": "liquid", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "lfm2-2.6b", - "provider_name": "Liquid", + "provider_model_id": "mistral-large-2407", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "liquid", + "provider_slug": "mistral", "quantization": "unknown", "supported_parameters": [ "max_tokens", @@ -85806,65 +85993,59 @@ "frequency_penalty", "presence_penalty", "seed", - "top_k", - "min_p", - "repetition_penalty" + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "LiquidAI/LFM2-2.6B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "LiquidAI/LFM2-2.6B", + "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", + "name": "Mistral Large 2407", "output_modalities": ["text"], - "permaslug": "liquid/lfm-2.2-6b", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/mistral-large-2407", + "reasoning_config": null, "router": null, - "short_name": "LiquidAI/LFM2-2.6B", - "slug": "liquid/lfm-2.2-6b", + "short_name": "Mistral Large 2407", + "slug": "mistralai/mistral-large-2407", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "liquid", - "context_length": 32768, - "created_at": "2025-10-20T14:36:24.431587+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 131072, + "created_at": "2024-11-19T01:11:25.108028+00:00", + "default_parameters": { + "temperature": 0.3 + }, "default_stops": [], "default_system": null, - "description": "Model created via inbox interface", + "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": true, - "context_length": 32768, + "adapter_name": "MistralAdapter", + "can_abort": false, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -85873,8 +86054,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "4cb2b30a-145b-4916-bbd0-29e6568e1749", + "has_completions": false, + "id": "26f5ecd0-44cb-43e8-8cfc-7b155c2e8c05", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -85887,79 +86068,107 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "liquid", - "context_length": 8192, - "created_at": "2025-10-20T14:36:24.431587+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 128000, + "created_at": "2024-11-19T01:11:25.108028+00:00", + "default_parameters": { + "temperature": 0.3 + }, "default_stops": [], "default_system": null, - "description": "Model created via inbox interface", + "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.", "features": {}, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": "LiquidAI/LFM2-8B-A1B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "LiquidAI/LFM2-8B-A1B", + "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", + "name": "Mistral Large 2411", "output_modalities": ["text"], - "permaslug": "liquid/lfm2-8b-a1b", + "permaslug": "mistralai/mistral-large-2411", "reasoning_config": null, "router": null, - "short_name": "LiquidAI/LFM2-8B-A1B", - "slug": "liquid/lfm2-8b-a1b", + "short_name": "Mistral Large 2411", + "slug": "mistralai/mistral-large-2411", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "liquid/lfm2-8b-a1b", - "model_variant_slug": "liquid/lfm2-8b-a1b", + "model_variant_permaslug": "mistralai/mistral-large-2411", + "model_variant_slug": "mistralai/mistral-large-2411", "moderation_required": false, - "name": "Liquid | liquid/lfm2-8b-a1b", + "name": "Mistral | mistralai/mistral-large-2411", "pricing": { - "completion": "0.00000002", + "completion": "0.000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000001", - "request": "0", - "web_search": "0" + "prompt": "0.000002" }, - "provider_display_name": "Liquid", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://router.liquid.ai/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.liquid.ai/privacy-policy", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://www.liquid.ai/terms-conditions", + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Liquid", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "FR", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.liquid.ai/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": ["lfm-40b"], - "isAbortable": true, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Liquid", + "name": "Mistral", "owners": ["{}"], - "slug": "liquid", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "lfm2-8b-a1b", - "provider_name": "Liquid", + "provider_model_id": "mistral-large-2411", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "liquid", + "provider_slug": "mistral", "quantization": "unknown", "supported_parameters": [ "max_tokens", @@ -85969,74 +86178,59 @@ "frequency_penalty", "presence_penalty", "seed", - "top_k", - "min_p", - "repetition_penalty" + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": "LiquidAI/LFM2-8B-A1B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "LiquidAI/LFM2-8B-A1B", + "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", + "name": "Mistral Large 2411", "output_modalities": ["text"], - "permaslug": "liquid/lfm2-8b-a1b", + "permaslug": "mistralai/mistral-large-2411", "reasoning_config": null, "router": null, - "short_name": "LiquidAI/LFM2-8B-A1B", - "slug": "liquid/lfm2-8b-a1b", + "short_name": "Mistral Large 2411", + "slug": "mistralai/mistral-large-2411", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Liquid", - "slug": "liquid" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "Mancer", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" - }, - "models": [ + }, { - "author": "alpindale", - "context_length": 6144, - "created_at": "2023-11-10T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["USER:", ""], + "author": "mistralai", + "context_length": 32768, + "created_at": "2024-01-10T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], "default_system": null, - "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge", + "description": "Note: This model is being deprecated. Recommended replacement is the newer [Ministral 8B](/mistral/ministral-8b)\n\nThis model is currently powered by Mistral-7B-v0.2, and incorporates a \"better\" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.", "endpoint": { - "adapter_name": "MancerAdapter", - "can_abort": true, - "context_length": 6144, + "adapter_name": "MistralAdapter", + "can_abort": false, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": false - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -86045,8 +86239,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "53185ad0-a0dc-4fad-82b2-bffade322302", + "has_completions": false, + "id": "f543d3e4-4a9b-43f3-9988-c19abd5246c0", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -86055,149 +86249,177 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 1024, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "alpindale", - "context_length": 6144, - "created_at": "2023-11-10T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["USER:", ""], + "author": "mistralai", + "context_length": 32000, + "created_at": "2024-01-10T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], "default_system": null, - "description": "A large LLM created by combining two fine-tuned Llama 70B models into one 120B model. Combines Xwin and Euryale.\n\nCredits to\n- [@chargoddard](https://huggingface.co/chargoddard) for developing the framework used to merge the model - [mergekit](https://github.com/cg123/mergekit).\n- [@Undi95](https://huggingface.co/Undi95) for helping with the merge ratios.\n\n#merge", + "description": "Note: This model is being deprecated. Recommended replacement is the newer [Ministral 8B](/mistral/ministral-8b)\n\nThis model is currently powered by Mistral-7B-v0.2, and incorporates a \"better\" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.", "features": {}, - "group": "Llama2", + "group": "Mistral", "has_text_output": true, - "hf_slug": "alpindale/goliath-120b", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "airoboros", + "instruct_type": null, "model_version_group_id": null, - "name": "Goliath 120B", + "name": "Mistral Tiny", "output_modalities": ["text"], - "permaslug": "alpindale/goliath-120b", + "permaslug": "mistralai/mistral-tiny", "reasoning_config": null, "router": null, - "short_name": "Goliath 120B", - "slug": "alpindale/goliath-120b", + "short_name": "Mistral Tiny", + "slug": "mistralai/mistral-tiny", "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null + "warning_message": "This model is deprecated and slated for retirement. Please see [Ministral 8B](/mistral/ministral-8b) for the Mistral suggested upgrade." }, - "model_variant_permaslug": "alpindale/goliath-120b", - "model_variant_slug": "alpindale/goliath-120b", + "model_variant_permaslug": "mistralai/mistral-tiny", + "model_variant_slug": "mistralai/mistral-tiny", "moderation_required": false, - "name": "Mancer 2 | alpindale/goliath-120b", + "name": "Mistral | mistralai/mistral-tiny", "pricing": { - "completion": "0.000008", + "completion": "0.00000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, - "provider_display_name": "Mancer", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "MancerAdapter", - "baseUrl": "https://neuro.mancer.tech/oai/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Mancer", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], - "isAbortable": true, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mancer 2", + "name": "Mistral", "owners": ["{}"], - "slug": "mancer", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "goliath-120b", - "provider_name": "Mancer 2", + "provider_model_id": "open-mistral-7b", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "mancer/int4", - "quantization": "int4", + "provider_slug": "mistral", + "quantization": "unknown", "supported_parameters": [ - "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", "seed", - "top_a", - "logprobs", - "top_logprobs" + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama2", + "group": "Mistral", "has_text_output": true, - "hf_slug": "alpindale/goliath-120b", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "airoboros", + "instruct_type": null, "model_version_group_id": null, - "name": "Goliath 120B", + "name": "Mistral Tiny", "output_modalities": ["text"], - "permaslug": "alpindale/goliath-120b", + "permaslug": "mistralai/mistral-tiny", "reasoning_config": null, "router": null, - "short_name": "Goliath 120B", - "slug": "alpindale/goliath-120b", + "short_name": "Mistral Tiny", + "slug": "mistralai/mistral-tiny", "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null + "warning_message": "This model is deprecated and slated for retirement. Please see [Ministral 8B](/mistral/ministral-8b) for the Mistral suggested upgrade." }, { - "author": "anthracite-org", - "context_length": 16384, - "created_at": "2024-10-22T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "mistralai", + "context_length": 256000, + "created_at": "2025-08-01T20:20:30.639517+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], "default_system": null, - "description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).\n\nThe model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).", + "description": "Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)", "endpoint": { - "adapter_name": "MancerAdapter", - "can_abort": true, - "context_length": 16384, + "adapter_name": "MistralAdapter", + "can_abort": false, + "context_length": 256000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { "supported_parameters": { "response_format": true, - "structured_outputs": false + "structured_outputs": true }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -86206,8 +86428,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "7f8d0c76-0eab-4606-9f4c-d0b4414e2cf1", + "has_completions": false, + "id": "dfdc2841-297c-4de9-96fe-eab0ccfa25a6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -86216,84 +86438,124 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "anthracite-org", - "context_length": 32768, - "created_at": "2024-10-22T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "mistralai", + "context_length": 256000, + "created_at": "2025-08-01T20:20:30.639517+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], "default_system": null, - "description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).\n\nThe model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).", - "features": {}, - "group": "Qwen", + "description": "Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "anthracite-org/magnum-v4-72b", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Magnum v4 72B", + "name": "Mistral: Codestral 2508", "output_modalities": ["text"], - "permaslug": "anthracite-org/magnum-v4-72b", - "reasoning_config": null, + "permaslug": "mistralai/codestral-2508", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Magnum v4 72B", - "slug": "anthracite-org/magnum-v4-72b", + "short_name": "Codestral 2508", + "slug": "mistralai/codestral-2508", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "anthracite-org/magnum-v4-72b", - "model_variant_slug": "anthracite-org/magnum-v4-72b", + "model_variant_permaslug": "mistralai/codestral-2508", + "model_variant_slug": "mistralai/codestral-2508", "moderation_required": false, - "name": "Mancer 2 | anthracite-org/magnum-v4-72b", + "name": "Mistral | mistralai/codestral-2508", "pricing": { - "completion": "0.000005", + "completion": "0.0000009", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, - "provider_display_name": "Mancer", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "MancerAdapter", - "baseUrl": "https://neuro.mancer.tech/oai/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Mancer", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], - "isAbortable": true, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mancer 2", + "name": "Mistral", "owners": ["{}"], - "slug": "mancer", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "magnum-72b-v4", - "provider_name": "Mancer 2", + "provider_model_id": "codestral-2508", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "mancer/fp8", - "quantization": "fp8", + "provider_slug": "mistral", + "quantization": "unknown", "supported_parameters": [ + "structured_outputs", "response_format", "max_tokens", "temperature", @@ -86301,64 +86563,70 @@ "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", "seed", - "top_a", - "logprobs", - "top_logprobs" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Qwen", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "anthracite-org/magnum-v4-72b", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Magnum v4 72B", + "name": "Mistral: Codestral 2508", "output_modalities": ["text"], - "permaslug": "anthracite-org/magnum-v4-72b", - "reasoning_config": null, + "permaslug": "mistralai/codestral-2508", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Magnum v4 72B", - "slug": "anthracite-org/magnum-v4-72b", + "short_name": "Codestral 2508", + "slug": "mistralai/codestral-2508", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mancer", - "context_length": 8000, - "created_at": "2023-08-02T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "mistralai", + "context_length": 8192, + "created_at": "2025-10-30T22:47:40.42301+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.", + "description": "Mistral Codestral Embed is specially designed for code, perfect for embedding code databases, repositories, and powering coding assistants with state-of-the-art retrieval.", "endpoint": { - "adapter_name": "MancerAdapter", - "can_abort": true, - "context_length": 8000, + "adapter_name": "MistralAdapter", + "can_abort": false, + "context_length": 8192, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": false - }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -86367,8 +86635,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "c8fb1a1b-9f2c-4dc6-b2be-43b25f999502", + "has_completions": false, + "id": "a93321a4-2cbf-4bd0-a0cf-09cb593fb821", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -86377,99 +86645,135 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mancer", - "context_length": 8000, - "created_at": "2023-08-02T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "mistralai", + "context_length": 8192, + "created_at": "2025-10-30T22:47:40.42301+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.", - "features": {}, - "group": "Llama2", - "has_text_output": true, + "description": "Mistral Codestral Embed is specially designed for code, perfect for embedding code databases, repositories, and powering coding assistants with state-of-the-art retrieval.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "Mancer: Weaver (alpha)", - "output_modalities": ["text"], - "permaslug": "mancer/weaver", - "reasoning_config": null, + "name": "Mistral: Codestral Embed 2505", + "output_modalities": ["embeddings"], + "permaslug": "mistralai/codestral-embed-2505", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Weaver (alpha)", - "slug": "mancer/weaver", + "short_name": "Codestral Embed 2505", + "slug": "mistralai/codestral-embed-2505", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mancer/weaver", - "model_variant_slug": "mancer/weaver", + "model_variant_permaslug": "mistralai/codestral-embed-2505", + "model_variant_slug": "mistralai/codestral-embed-2505", "moderation_required": false, - "name": "Mancer 2 | mancer/weaver", + "name": "Mistral | mistralai/codestral-embed-2505", "pricing": { - "completion": "0.000001", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000075", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, - "provider_display_name": "Mancer", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "MancerAdapter", - "baseUrl": "https://neuro.mancer.tech/oai/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Mancer", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], - "isAbortable": true, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mancer 2", + "name": "Mistral", "owners": ["{}"], - "slug": "mancer", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "weaver-alpha", - "provider_name": "Mancer 2", + "provider_model_id": "codestral-embed-2505", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "mancer/fp8", - "quantization": "fp8", + "provider_slug": "mistral", + "quantization": "unknown", "supported_parameters": [ - "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", "seed", - "top_a", - "logprobs", - "top_logprobs" + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, @@ -86477,42 +86781,57 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama2", - "has_text_output": true, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "Mancer: Weaver (alpha)", - "output_modalities": ["text"], - "permaslug": "mancer/weaver", - "reasoning_config": null, + "name": "Mistral: Codestral Embed 2505", + "output_modalities": ["embeddings"], + "permaslug": "mistralai/codestral-embed-2505", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Weaver (alpha)", - "slug": "mancer/weaver", + "short_name": "Codestral Embed 2505", + "slug": "mistralai/codestral-embed-2505", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "gryphe", - "context_length": 8192, - "created_at": "2023-07-02T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], - "default_system": null, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", - "endpoint": { - "adapter_name": "MancerAdapter", - "can_abort": true, - "context_length": 8192, + "author": "mistralai", + "context_length": 262144, + "created_at": "2025-12-09T13:03:39+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.3, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "Devstral 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 123B-parameter dense transformer model supporting a 256K context window.\n\nDevstral 2 supports exploring codebases and orchestrating changes across multiple files while maintaining architecture-level context. It tracks framework dependencies, detects failures, and retries with corrections—solving challenges like bug fixing and modernizing legacy systems. The model can be fine-tuned to prioritize specific languages or optimize for large enterprise codebases. It is available under a modified MIT license.", + "endpoint": { + "adapter_name": "MistralAdapter", + "can_abort": false, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { @@ -86524,8 +86843,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "2ffda66b-b09e-405f-9a32-fe06a4097943", + "has_completions": false, + "id": "8551fff5-940d-4c61-95a7-243791f03dea", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -86534,83 +86853,125 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "gryphe", - "context_length": 4096, - "created_at": "2023-07-02T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "mistralai", + "context_length": 262144, + "created_at": "2025-12-09T13:03:39+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.3, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", - "features": {}, - "group": "Llama2", + "description": "Devstral 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 123B-parameter dense transformer model supporting a 256K context window.\n\nDevstral 2 supports exploring codebases and orchestrating changes across multiple files while maintaining architecture-level context. It tracks framework dependencies, detects failures, and retries with corrections—solving challenges like bug fixing and modernizing legacy systems. The model can be fine-tuned to prioritize specific languages or optimize for large enterprise codebases. It is available under a modified MIT license.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "Gryphe/MythoMax-L2-13b", + "hf_slug": "mistralai/Devstral-2-123B-Instruct-2512", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "MythoMax 13B", + "name": "Mistral: Devstral 2 2512", "output_modalities": ["text"], - "permaslug": "gryphe/mythomax-l2-13b", - "reasoning_config": null, + "permaslug": "mistralai/devstral-2512", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "MythoMax 13B", - "slug": "gryphe/mythomax-l2-13b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Devstral 2 2512", + "slug": "mistralai/devstral-2512", + "updated_at": "2026-01-26T16:44:50.583818+00:00", "warning_message": null }, - "model_variant_permaslug": "gryphe/mythomax-l2-13b", - "model_variant_slug": "gryphe/mythomax-l2-13b", + "model_variant_permaslug": "mistralai/devstral-2512", + "model_variant_slug": "mistralai/devstral-2512", "moderation_required": false, - "name": "Mancer 2 | gryphe/mythomax-l2-13b", + "name": "Mistral | mistralai/devstral-2512", "pricing": { - "completion": "0.00000075", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, - "provider_display_name": "Mancer", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "MancerAdapter", - "baseUrl": "https://neuro.mancer.tech/oai/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Mancer", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], - "isAbortable": true, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mancer 2", + "name": "Mistral", "owners": ["{}"], - "slug": "mancer", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "mythomax", - "provider_name": "Mancer 2", + "provider_model_id": "devstral-2512", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "mancer/fp8", - "quantization": "fp8", + "provider_slug": "mistral", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -86618,65 +86979,71 @@ "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", "seed", - "top_a", "response_format", - "logprobs", - "top_logprobs" + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama2", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "Gryphe/MythoMax-L2-13b", + "hf_slug": "mistralai/Devstral-2-123B-Instruct-2512", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "MythoMax 13B", + "name": "Mistral: Devstral 2 2512", "output_modalities": ["text"], - "permaslug": "gryphe/mythomax-l2-13b", - "reasoning_config": null, + "permaslug": "mistralai/devstral-2512", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "MythoMax 13B", - "slug": "gryphe/mythomax-l2-13b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Devstral 2 2512", + "slug": "mistralai/devstral-2512", + "updated_at": "2026-01-26T16:44:50.583818+00:00", "warning_message": null }, { - "author": "neversleep", - "context_length": 8192, - "created_at": "2023-11-26T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "mistralai", + "context_length": 131072, + "created_at": "2025-07-10T15:28:41.981407+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], "default_system": null, - "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.\n\n#merge #uncensored", + "description": "Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI. Positioned as a step up from Devstral Small, it achieves 61.6% on SWE-Bench Verified, placing it ahead of Gemini 2.5 Pro and GPT-4.1 in code-related tasks, at a fraction of the cost. It is designed for generalization across prompt styles and tool use in code agents and frameworks.\n\nDevstral Medium is available via API only (not open-weight), and supports enterprise deployment on private infrastructure, with optional fine-tuning capabilities.", "endpoint": { - "adapter_name": "MancerAdapter", - "can_abort": true, - "context_length": 8192, + "adapter_name": "MistralAdapter", + "can_abort": false, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": false - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -86685,8 +87052,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "c8e2d3a6-3c82-4c21-a951-b7a87720b298", + "has_completions": false, + "id": "8f47daf9-62e7-423d-96de-fcb241b39175", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -86695,149 +87062,193 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "neversleep", - "context_length": 8192, - "created_at": "2023-11-26T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "mistralai", + "context_length": 131072, + "created_at": "2025-07-10T15:28:41.981407+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], "default_system": null, - "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.\n\n#merge #uncensored", - "features": {}, - "group": "Llama2", + "description": "Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI. Positioned as a step up from Devstral Small, it achieves 61.6% on SWE-Bench Verified, placing it ahead of Gemini 2.5 Pro and GPT-4.1 in code-related tasks, at a fraction of the cost. It is designed for generalization across prompt styles and tool use in code agents and frameworks.\n\nDevstral Medium is available via API only (not open-weight), and supports enterprise deployment on private infrastructure, with optional fine-tuning capabilities.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "NeverSleep/Noromaid-20b-v0.1.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "Noromaid 20B", + "name": "Mistral: Devstral Medium", "output_modalities": ["text"], - "permaslug": "neversleep/noromaid-20b", - "reasoning_config": null, + "permaslug": "mistralai/devstral-medium-2507", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Noromaid 20B", - "slug": "neversleep/noromaid-20b", + "short_name": "Devstral Medium", + "slug": "mistralai/devstral-medium", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "neversleep/noromaid-20b", - "model_variant_slug": "neversleep/noromaid-20b", + "model_variant_permaslug": "mistralai/devstral-medium-2507", + "model_variant_slug": "mistralai/devstral-medium", "moderation_required": false, - "name": "Mancer 2 | neversleep/noromaid-20b", + "name": "Mistral | mistralai/devstral-medium-2507", "pricing": { "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, - "provider_display_name": "Mancer", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "MancerAdapter", - "baseUrl": "https://neuro.mancer.tech/oai/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Mancer", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], - "isAbortable": true, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mancer 2", + "name": "Mistral", "owners": ["{}"], - "slug": "mancer", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "noromaid", - "provider_name": "Mancer 2", + "provider_model_id": "devstral-medium-2507", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "mancer/fp8", - "quantization": "fp8", + "provider_slug": "mistral", + "quantization": "unknown", "supported_parameters": [ - "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", "seed", - "top_a", - "logprobs", - "top_logprobs" + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama2", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "NeverSleep/Noromaid-20b-v0.1.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "Noromaid 20B", + "name": "Mistral: Devstral Medium", "output_modalities": ["text"], - "permaslug": "neversleep/noromaid-20b", - "reasoning_config": null, + "permaslug": "mistralai/devstral-medium-2507", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Noromaid 20B", - "slug": "neversleep/noromaid-20b", + "short_name": "Devstral Medium", + "slug": "mistralai/devstral-medium", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "undi95", - "context_length": 6144, - "created_at": "2023-07-22T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "mistralai", + "context_length": 131072, + "created_at": "2025-07-10T15:19:11.726916+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], "default_system": null, - "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", + "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and released under the Apache 2.0 license, it features a 128k token context window and supports both Mistral-style function calling and XML output formats.\n\nDesigned for agentic coding workflows, Devstral Small 1.1 is optimized for tasks such as codebase exploration, multi-file edits, and integration into autonomous development agents like OpenHands and Cline. It achieves 53.6% on SWE-Bench Verified, surpassing all other open models on this benchmark, while remaining lightweight enough to run on a single 4090 GPU or Apple silicon machine. The model uses a Tekken tokenizer with a 131k vocabulary and is deployable via vLLM, Transformers, Ollama, LM Studio, and other OpenAI-compatible runtimes.\n", "endpoint": { - "adapter_name": "MancerAdapter", - "can_abort": true, - "context_length": 6144, + "adapter_name": "MistralAdapter", + "can_abort": false, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": false - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -86846,8 +87257,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "75f85fa5-1ee8-474a-b0ae-24ccc2bbb113", + "has_completions": false, + "id": "768e136a-0758-4c83-b1a7-701cb57da9e7", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -86856,153 +87267,195 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "undi95", - "context_length": 4096, - "created_at": "2023-07-22T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "mistralai", + "context_length": 131072, + "created_at": "2025-07-10T15:19:11.726916+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], "default_system": null, - "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", - "features": {}, - "group": "Llama2", + "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and released under the Apache 2.0 license, it features a 128k token context window and supports both Mistral-style function calling and XML output formats.\n\nDesigned for agentic coding workflows, Devstral Small 1.1 is optimized for tasks such as codebase exploration, multi-file edits, and integration into autonomous development agents like OpenHands and Cline. It achieves 53.6% on SWE-Bench Verified, surpassing all other open models on this benchmark, while remaining lightweight enough to run on a single 4090 GPU or Apple silicon machine. The model uses a Tekken tokenizer with a 131k vocabulary and is deployable via vLLM, Transformers, Ollama, LM Studio, and other OpenAI-compatible runtimes.\n", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "Undi95/ReMM-SLERP-L2-13B", + "hf_slug": "mistralai/Devstral-Small-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "ReMM SLERP 13B", + "name": "Mistral: Devstral Small 1.1", "output_modalities": ["text"], - "permaslug": "undi95/remm-slerp-l2-13b", - "reasoning_config": null, + "permaslug": "mistralai/devstral-small-2507", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "ReMM SLERP 13B", - "slug": "undi95/remm-slerp-l2-13b", + "short_name": "Devstral Small 1.1", + "slug": "mistralai/devstral-small", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "undi95/remm-slerp-l2-13b", - "model_variant_slug": "undi95/remm-slerp-l2-13b", + "model_variant_permaslug": "mistralai/devstral-small-2507", + "model_variant_slug": "mistralai/devstral-small", "moderation_required": false, - "name": "Mancer 2 | undi95/remm-slerp-l2-13b", + "name": "Mistral | mistralai/devstral-small-2507", "pricing": { - "completion": "0.00000075", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, - "provider_display_name": "Mancer", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "MancerAdapter", - "baseUrl": "https://neuro.mancer.tech/oai/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Mancer", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], - "isAbortable": true, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mancer 2", + "name": "Mistral", "owners": ["{}"], - "slug": "mancer", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "remm-slerp", - "provider_name": "Mancer 2", + "provider_model_id": "devstral-small-2507", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "mancer/fp8", - "quantization": "fp8", + "provider_slug": "mistral", + "quantization": "unknown", "supported_parameters": [ - "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", "seed", - "top_a", - "logprobs", - "top_logprobs" + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama2", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "Undi95/ReMM-SLERP-L2-13B", + "hf_slug": "mistralai/Devstral-Small-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "ReMM SLERP 13B", + "name": "Mistral: Devstral Small 1.1", "output_modalities": ["text"], - "permaslug": "undi95/remm-slerp-l2-13b", - "reasoning_config": null, + "permaslug": "mistralai/devstral-small-2507", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "ReMM SLERP 13B", - "slug": "undi95/remm-slerp-l2-13b", + "short_name": "Devstral Small 1.1", + "slug": "mistralai/devstral-small", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-09-30T12:32:56.306946+00:00", + "author": "mistralai", + "context_length": 262144, + "created_at": "2025-12-02T13:22:15.851192+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, + "temperature": 0.3, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.", "endpoint": { - "adapter_name": "MancerAdapter", - "can_abort": true, - "context_length": 131072, + "adapter_name": "MistralAdapter", + "can_abort": false, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": false - }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -87011,8 +87464,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "ed92959c-5309-43f3-b897-8039cb7478af", + "has_completions": false, + "id": "0b53717b-9af5-4de3-80af-ebedd2adf981", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -87021,120 +87474,140 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 200000, - "created_at": "2025-09-30T12:32:56.306946+00:00", + "author": "mistralai", + "context_length": 128000, + "created_at": "2025-12-02T13:22:15.851192+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, + "temperature": 0.3, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": null, + "hf_slug": "mistralai/Ministral-3-14B-Instruct-2512", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "Mistral: Ministral 3 14B 2512", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "mistralai/ministral-14b-2512", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "Ministral 3 14B 2512", + "slug": "mistralai/ministral-14b-2512", + "updated_at": "2025-12-10T16:54:21.432818+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6", - "model_variant_slug": "z-ai/glm-4.6", + "model_variant_permaslug": "mistralai/ministral-14b-2512", + "model_variant_slug": "mistralai/ministral-14b-2512", "moderation_required": false, - "name": "Mancer 2 | z-ai/glm-4.6", + "name": "Mistral | mistralai/ministral-14b-2512", "pricing": { - "completion": "0.000002", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000045", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, - "provider_display_name": "Mancer", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "MancerAdapter", - "baseUrl": "https://neuro.mancer.tech/oai/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Mancer", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], - "isAbortable": true, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mancer 2", + "name": "Mistral", "owners": ["{}"], - "slug": "mancer", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "glm-4.6", - "provider_name": "Mancer 2", + "provider_model_id": "ministral-14b-2512", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "mancer/fp8", - "quantization": "fp8", + "provider_slug": "mistral", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", "seed", - "top_a", - "logprobs", - "top_logprobs", - "tools" + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" @@ -87142,68 +87615,68 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": null, + "hf_slug": "mistralai/Ministral-3-14B-Instruct-2512", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "Mistral: Ministral 3 14B 2512", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "mistralai/ministral-14b-2512", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "Ministral 3 14B 2512", + "slug": "mistralai/ministral-14b-2512", + "updated_at": "2025-12-10T16:54:21.432818+00:00", "warning_message": null }, { - "author": "z-ai", + "author": "mistralai", "context_length": 131072, - "created_at": "2025-12-22T04:33:34.884504+00:00", + "created_at": "2025-12-02T13:19:20.726635+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": 0.3, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "description": "The smallest model in the Ministral 3 family, Ministral 3 3B is a powerful, efficient tiny language model with vision capabilities.", "endpoint": { - "adapter_name": "MancerAdapter", - "can_abort": true, + "adapter_name": "MistralAdapter", + "can_abort": false, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { - "reasoning_return_mechanism": "reasoning-content", + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, - "literal_none": false, - "literal_required": false, - "type_function": false + "literal_none": true, + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, - "has_completions": true, - "id": "c05c00f3-1afd-4238-880b-911611079858", + "has_completions": false, + "id": "b9f6828f-aef9-498a-91f5-6816fbf72420", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -87212,120 +87685,140 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 200000, - "created_at": "2025-12-22T04:33:34.884504+00:00", + "author": "mistralai", + "context_length": 128000, + "created_at": "2025-12-02T13:19:20.726635+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": 0.3, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "description": "The smallest model in the Ministral 3 family, Ministral 3 3B is a powerful, efficient tiny language model with vision capabilities.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.7", + "hf_slug": "mistralai/Ministral-3-3B-Instruct-2512", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.7", + "name": "Mistral: Ministral 3 3B 2512", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.7-20251222", + "permaslug": "mistralai/ministral-3b-2512", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.7", - "slug": "z-ai/glm-4.7", - "updated_at": "2026-01-07T19:34:06.523149+00:00", + "short_name": "Ministral 3 3B 2512", + "slug": "mistralai/ministral-3b-2512", + "updated_at": "2025-12-10T16:53:53.577361+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.7-20251222", - "model_variant_slug": "z-ai/glm-4.7", + "model_variant_permaslug": "mistralai/ministral-3b-2512", + "model_variant_slug": "mistralai/ministral-3b-2512", "moderation_required": false, - "name": "Mancer 2 | z-ai/glm-4.7-20251222", + "name": "Mistral | mistralai/ministral-3b-2512", "pricing": { - "completion": "0.000002", + "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000045", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, - "provider_display_name": "Mancer", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "MancerAdapter", - "baseUrl": "https://neuro.mancer.tech/oai/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mancer.tech/privacy", - "retainsPrompts": false, - "termsOfServiceURL": "https://mancer.tech/terms", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "Mancer", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://mancer.tech/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": ["mytholite", "dans-pe-1.3-12b", "dans-pe-1.3-24b"], - "isAbortable": true, + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mancer 2", + "name": "Mistral", "owners": ["{}"], - "slug": "mancer", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "glm-4.7", - "provider_name": "Mancer 2", + "provider_model_id": "ministral-3b-2512", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "mancer/fp8", - "quantization": "fp8", + "provider_slug": "mistral", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "repetition_penalty", - "logit_bias", - "top_k", - "min_p", "seed", - "top_a", "response_format", - "logprobs", - "top_logprobs", + "structured_outputs", + "tool_choice", "tools" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" @@ -87333,74 +87826,58 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.7", + "hf_slug": "mistralai/Ministral-3-3B-Instruct-2512", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.7", + "name": "Mistral: Ministral 3 3B 2512", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.7-20251222", + "permaslug": "mistralai/ministral-3b-2512", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GLM 4.7", - "slug": "z-ai/glm-4.7", - "updated_at": "2026-01-07T19:34:06.523149+00:00", + "short_name": "Ministral 3 3B 2512", + "slug": "mistralai/ministral-3b-2512", + "updated_at": "2025-12-10T16:53:53.577361+00:00", "warning_message": null - } - ], - "name": "Mancer 2", - "slug": "mancer" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": false - }, - "datacenters": ["US"], - "displayName": "MiniMax", - "headquarters": "SG", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://minimaxi.com/&size=256" - }, - "models": [ + }, { - "author": "minimax", - "context_length": 1000000, - "created_at": "2025-06-17T22:46:54.257159+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 262144, + "created_at": "2025-12-02T13:20:54.103183+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.3, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", + "description": "A balanced model in the Ministral 3 family, Ministral 3 8B is a powerful, efficient tiny language model with vision capabilities.", "endpoint": { - "adapter_name": "MinimaxAdapter", + "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 1000000, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -87410,7 +87887,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "dd5bc8d5-68a7-48a7-a680-db37579d5c14", + "id": "c85d26e1-1e0c-449b-9775-2afce7ae510b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -87419,353 +87896,197 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 40000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "minimax", - "context_length": 1000000, - "created_at": "2025-06-17T22:46:54.257159+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Other", - "has_text_output": true, - "hf_slug": null, - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MiniMax: MiniMax M1", - "output_modalities": ["text"], - "permaslug": "minimax/minimax-m1", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, - "router": null, - "short_name": "MiniMax M1", - "slug": "minimax/minimax-m1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "minimax/minimax-m1", - "model_variant_slug": "minimax/minimax-m1", - "moderation_required": false, - "name": "Minimax | minimax/minimax-m1", - "pricing": { - "completion": "0.0000022", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "MiniMax", - "provider_info": { - "adapterName": "MinimaxAdapter", - "baseUrl": "https://api.minimaxi.chat/v1", - "byokEnabled": true, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", - "training": false - }, - "displayName": "MiniMax", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "SG", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://minimaxi.com/&size=256" - }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": true, - "moderationRequired": false, - "name": "Minimax", - "owners": ["{}"], - "slug": "minimax", - "statusPageUrl": null - }, - "provider_model_id": "MiniMax-M1", - "provider_name": "Minimax", - "provider_region": null, - "provider_slug": "minimax", - "quantization": "unknown", - "supported_parameters": [ - "reasoning", - "include_reasoning", - "max_tokens", - "temperature", - "top_p" - ], - "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": false, - "variable_pricings": [ - { - "completions": "0.0000022", - "prompt": "0.0000013", - "threshold": 200000, - "type": "prompt-threshold" - } - ], - "variant": "standard" - }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Other", - "has_text_output": true, - "hf_slug": null, - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MiniMax: MiniMax M1", - "output_modalities": ["text"], - "permaslug": "minimax/minimax-m1", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, - "router": null, - "short_name": "MiniMax M1", - "slug": "minimax/minimax-m1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-10-23T20:41:33.120854+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, - "default_stops": [], - "default_system": null, - "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", - "endpoint": { - "adapter_name": "MinimaxReasoningAdapter", - "can_abort": false, - "context_length": 204800, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", - "training": false - }, - "features": { - "is_mandatory_reasoning": true, - "should_send_reasoning_text_in_text_content": false, - "supports_input_audio": false, - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": false, - "id": "46585386-044e-4e91-852c-f56bf056e1af", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": 500, - "limit_rpm_cf": null, - "max_completion_tokens": 131072, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-10-23T20:41:33.120854+00:00", + "author": "mistralai", + "context_length": 128000, + "created_at": "2025-12-02T13:20:54.103183+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": 0.3, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "description": "A balanced model in the Ministral 3 family, Ministral 3 8B is a powerful, efficient tiny language model with vision capabilities.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2", + "hf_slug": "mistralai/Ministral-3-8B-Instruct-2512", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M2", + "name": "Mistral: Ministral 3 8B 2512", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2", + "permaslug": "mistralai/ministral-8b-2512", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "MiniMax M2", - "slug": "minimax/minimax-m2", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Ministral 3 8B 2512", + "slug": "mistralai/ministral-8b-2512", + "updated_at": "2025-12-10T16:54:03.715606+00:00", "warning_message": null }, - "model_variant_permaslug": "minimax/minimax-m2", - "model_variant_slug": "minimax/minimax-m2", + "model_variant_permaslug": "mistralai/ministral-8b-2512", + "model_variant_slug": "mistralai/ministral-8b-2512", "moderation_required": false, - "name": "Minimax | minimax/minimax-m2", + "name": "Mistral | mistralai/ministral-8b-2512", "pricing": { - "completion": "0.00000102", - "discount": 0.15, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000255", - "request": "0", - "web_search": "0" + "completion": "0.00000015", + "discount": 0, + "prompt": "0.00000015" }, - "provider_display_name": "MiniMax", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "MinimaxReasoningAdapter", - "baseUrl": "https://api.minimaxi.chat/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "MiniMax", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, - "headquarters": "SG", + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://minimaxi.com/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Minimax", + "name": "Mistral", "owners": ["{}"], - "slug": "minimax", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "MiniMax-M2-Stable", - "provider_name": "Minimax", + "provider_model_id": "ministral-8b-2512", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "minimax/fp8", - "quantization": "fp8", + "provider_slug": "mistral", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", - "tool_choice", - "tools" + "stop", + "frequency_penalty", + "presence_penalty", + "seed", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2", + "hf_slug": "mistralai/Ministral-3-8B-Instruct-2512", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M2", + "name": "Mistral: Ministral 3 8B 2512", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2", + "permaslug": "mistralai/ministral-8b-2512", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "MiniMax M2", - "slug": "minimax/minimax-m2", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Ministral 3 8B 2512", + "slug": "mistralai/ministral-8b-2512", + "updated_at": "2025-12-10T16:54:03.715606+00:00", "warning_message": null }, { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-12-23T01:56:37+00:00", + "author": "mistralai", + "context_length": 131072, + "created_at": "2024-10-17T00:00:00+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.9 + "temperature": 0.3 }, "default_stops": [], - "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", - "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "default_system": null, + "description": "Ministral 3B is a 3B parameter model optimized for on-device and edge computing. It excels in knowledge, commonsense reasoning, and function-calling, outperforming larger models like Mistral 7B on most benchmarks. Supporting up to 128k context length, it’s ideal for orchestrating agentic workflows and specialist tasks with efficient inference.", "endpoint": { - "adapter_name": "MinimaxReasoningAdapter", + "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 204800, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, "features": { - "is_mandatory_reasoning": true, - "should_send_reasoning_text_in_text_content": false, - "supports_input_audio": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -87775,7 +88096,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "e1e246cc-6062-43ca-96cd-b7e580ea81c7", + "id": "71ce5f13-6ef2-4cbe-a650-66d2f11a4ecb", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -87784,323 +88105,164 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-12-23T01:56:37+00:00", + "author": "mistralai", + "context_length": 128000, + "created_at": "2024-10-17T00:00:00+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.9 + "temperature": 0.3 }, "default_stops": [], - "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", - "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "default_system": null, + "description": "Ministral 3B is a 3B parameter model optimized for on-device and edge computing. It excels in knowledge, commonsense reasoning, and function-calling, outperforming larger models like Mistral 7B on most benchmarks. Supporting up to 128k context length, it’s ideal for orchestrating agentic workflows and specialist tasks with efficient inference.", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M2.1", + "name": "Mistral: Ministral 3B", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2.1", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "mistralai/ministral-3b", + "reasoning_config": null, "router": null, - "short_name": "MiniMax M2.1", - "slug": "minimax/minimax-m2.1", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Ministral 3B", + "slug": "mistralai/ministral-3b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "minimax/minimax-m2.1", - "model_variant_slug": "minimax/minimax-m2.1", + "model_variant_permaslug": "mistralai/ministral-3b", + "model_variant_slug": "mistralai/ministral-3b", "moderation_required": false, - "name": "Minimax | minimax/minimax-m2.1", + "name": "Mistral | mistralai/ministral-3b", "pricing": { - "completion": "0.0000012", + "completion": "0.00000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, - "provider_display_name": "MiniMax", + "provider_display_name": "Mistral", "provider_info": { - "adapterName": "MinimaxReasoningAdapter", - "baseUrl": "https://api.minimaxi.chat/v1", + "adapterName": "MistralAdapter", + "baseUrl": "https://api.mistral.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", + "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", "retainsPrompts": true, - "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", + "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", "training": false }, - "displayName": "MiniMax", + "displayName": "Mistral", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": false, - "headquarters": "SG", + "headquarters": "FR", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://minimaxi.com/&size=256" + "url": "/images/icons/Mistral.png" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "mistral-moderation-2411-all", + "voxtral-mini-2507", + "voxtral-small-2507", + "voxtral-mini-transcribe-2507", + "mistral-medium", + "mistral-tiny", + "mistral-tiny-2312", + "open-mistral-nemo", + "mistral-tiny-2407", + "open-mixtral-8x7b", + "mistral-small", + "mistral-small-2312", + "open-mixtral-8x22b-2404", + "mistral-large-pixtral-2411", + "codestral-2412", + "codestral-2411-rc5", + "pixtral-12b", + "mistral-moderation-2411", + "mistral-ocr-2503", + "mistral-ocr-2505", + "mistral-saba-2502", + "open-mixtral-8x22b", + "mistral-large-2407", + "magistral-medium-2507", + "mistral-embed", + "codestral-embed", + "codestral-2501", + "mistral-small-2501", + "mistral-ocr-2512", + "labs-devstral-small-2512" + ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Minimax", + "name": "Mistral", "owners": ["{}"], - "slug": "minimax", - "statusPageUrl": null + "slug": "mistral", + "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "MiniMax-M2.1", - "provider_name": "Minimax", + "provider_model_id": "ministral-3b-2410", + "provider_name": "Mistral", "provider_region": null, - "provider_slug": "minimax/fp8", - "quantization": "fp8", + "provider_slug": "mistral", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "seed", + "response_format", + "structured_outputs", "tools", - "tool_choice", - "response_format" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "standard" - }, - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2.1", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MiniMax: MiniMax M2.1", - "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2.1", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, - "router": null, - "short_name": "MiniMax M2.1", - "slug": "minimax/minimax-m2.1", - "updated_at": "2026-01-08T19:23:52.555156+00:00", - "warning_message": null - }, - { - "author": "minimax", - "context_length": 1000192, - "created_at": "2025-01-15T04:31:02.677929+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding. It has 456 billion parameters, with 45.9 billion parameters activated per inference, and can handle a context of up to 4 million tokens.\n\nThe text model adopts a hybrid architecture that combines Lightning Attention, Softmax Attention, and Mixture-of-Experts (MoE). The image model adopts the “ViT-MLP-LLM” framework and is trained on top of the text model.\n\nTo read more about the release, see: https://www.minimaxi.com/en/news/minimax-01-series-2", - "endpoint": { - "adapter_name": "MinimaxAdapter", - "can_abort": false, - "context_length": 1000192, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", - "training": false - }, - "features": { - "supported_parameters": {}, - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": false, - "id": "352546d2-3758-4aa1-9e98-e1a83748aa4e", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 1000192, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "minimax", - "context_length": 1000000, - "created_at": "2025-01-15T04:31:02.677929+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding. It has 456 billion parameters, with 45.9 billion parameters activated per inference, and can handle a context of up to 4 million tokens.\n\nThe text model adopts a hybrid architecture that combines Lightning Attention, Softmax Attention, and Mixture-of-Experts (MoE). The image model adopts the “ViT-MLP-LLM” framework and is trained on top of the text model.\n\nTo read more about the release, see: https://www.minimaxi.com/en/news/minimax-01-series-2", - "features": {}, - "group": "Other", - "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-Text-01", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MiniMax: MiniMax-01", - "output_modalities": ["text"], - "permaslug": "minimax/minimax-01", - "reasoning_config": null, - "router": null, - "short_name": "MiniMax-01", - "slug": "minimax/minimax-01", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "minimax/minimax-01", - "model_variant_slug": "minimax/minimax-01", - "moderation_required": false, - "name": "Minimax | minimax/minimax-01", - "pricing": { - "completion": "0.0000011", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "MiniMax", - "provider_info": { - "adapterName": "MinimaxAdapter", - "baseUrl": "https://api.minimaxi.chat/v1", - "byokEnabled": true, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://www.minimax.io/platform/protocol/privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://www.minimax.io/platform/protocol/terms-of-service", - "training": false - }, - "displayName": "MiniMax", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "SG", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://minimaxi.com/&size=256" - }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": true, - "moderationRequired": false, - "name": "Minimax", - "owners": ["{}"], - "slug": "minimax", - "statusPageUrl": null - }, - "provider_model_id": "MiniMax-Text-01", - "provider_name": "Minimax", - "provider_region": null, - "provider_slug": "minimax", - "quantization": "unknown", - "supported_parameters": ["max_tokens", "temperature", "top_p"], - "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-Text-01", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax-01", + "name": "Mistral: Ministral 3B", "output_modalities": ["text"], - "permaslug": "minimax/minimax-01", + "permaslug": "mistralai/ministral-3b", "reasoning_config": null, "router": null, - "short_name": "MiniMax-01", - "slug": "minimax/minimax-01", + "short_name": "Ministral 3B", + "slug": "mistralai/ministral-3b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Minimax", - "slug": "minimax" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": false - }, - "displayName": "Mistral", - "headquarters": "FR", - "icon": { - "url": "/images/icons/Mistral.png" - }, - "models": [ + }, { "author": "mistralai", - "context_length": 128000, - "created_at": "2024-02-26T00:00:00+00:00", + "context_length": 131072, + "created_at": "2024-10-17T00:00:00+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.", + "description": "Ministral 8B is an 8B parameter model featuring a unique interleaved sliding-window attention pattern for faster, memory-efficient inference. Designed for edge use cases, it supports up to 128k context length and excels in knowledge and reasoning tasks. It outperforms peers in the sub-10B category, making it perfect for low-latency, privacy-first applications.", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 128000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -88118,7 +88280,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "f1a57233-f872-4fa0-ad37-66c9a6b00469", + "id": "c70696c7-73c0-47c3-96b4-20c44b17101b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -88133,13 +88295,13 @@ "model": { "author": "mistralai", "context_length": 128000, - "created_at": "2024-02-26T00:00:00+00:00", + "created_at": "2024-10-17T00:00:00+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.", + "description": "Ministral 8B is an 8B parameter model featuring a unique interleaved sliding-window attention pattern for faster, memory-efficient inference. Designed for edge use cases, it supports up to 128k context length and excels in knowledge and reasoning tasks. It outperforms peers in the sub-10B category, making it perfect for low-latency, privacy-first applications.", "features": {}, "group": "Mistral", "has_text_output": true, @@ -88148,25 +88310,25 @@ "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", - "name": "Mistral Large", + "model_version_group_id": null, + "name": "Mistral: Ministral 8B", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-large", + "permaslug": "mistralai/ministral-8b", "reasoning_config": null, "router": null, - "short_name": "Mistral Large", - "slug": "mistralai/mistral-large", + "short_name": "Ministral 8B", + "slug": "mistralai/ministral-8b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-large", - "model_variant_slug": "mistralai/mistral-large", + "model_variant_permaslug": "mistralai/ministral-8b", + "model_variant_slug": "mistralai/ministral-8b", "moderation_required": false, - "name": "Mistral | mistralai/mistral-large", + "name": "Mistral | mistralai/ministral-8b", "pricing": { - "completion": "0.000006", + "completion": "0.0000001", "discount": 0, - "prompt": "0.000002" + "prompt": "0.0000001" }, "provider_display_name": "Mistral", "provider_info": { @@ -88217,7 +88379,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -88227,7 +88390,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "mistral-large-2407", + "provider_model_id": "ministral-8b-2410", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -88259,31 +88422,33 @@ "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", - "name": "Mistral Large", + "model_version_group_id": null, + "name": "Mistral: Ministral 8B", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-large", + "permaslug": "mistralai/ministral-8b", "reasoning_config": null, "router": null, - "short_name": "Mistral Large", - "slug": "mistralai/mistral-large", + "short_name": "Ministral 8B", + "slug": "mistralai/ministral-8b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "mistralai", - "context_length": 131072, - "created_at": "2024-11-19T01:06:55.27469+00:00", + "context_length": 8192, + "created_at": "2025-10-31T21:03:42.776557+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n", + "description": "Mistral Embed is a specialized embedding model for text data, optimized for semantic search and RAG applications. Developed by Mistral AI in late 2023, it produces 1024-dimensional vectors that effectively capture semantic relationships in text.", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -88292,7 +88457,7 @@ "training": false }, "features": { - "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -88302,7 +88467,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "4a128170-b056-42d7-8462-a5cea647f9ad", + "id": "13fc4fba-4a70-4768-8c95-4837d1919201", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -88316,41 +88481,54 @@ "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 128000, - "created_at": "2024-11-19T01:06:55.27469+00:00", + "context_length": 8192, + "created_at": "2025-10-31T21:03:42.776557+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/).\n\nIt supports dozens of languages including French, German, Spanish, Italian, Portuguese, Arabic, Hindi, Russian, Chinese, Japanese, and Korean, along with 80+ coding languages including Python, Java, C, C++, JavaScript, and Bash. Its long context window allows precise information recall from large documents.\n", - "features": {}, + "description": "Mistral Embed is a specialized embedding model for text data, optimized for semantic search and RAG applications. Developed by Mistral AI in late 2023, it produces 1024-dimensional vectors that effectively capture semantic relationships in text.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Mistral", - "has_text_output": true, + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", - "name": "Mistral Large 2407", - "output_modalities": ["text"], - "permaslug": "mistralai/mistral-large-2407", - "reasoning_config": null, + "model_version_group_id": null, + "name": "Mistral: Mistral Embed 2312", + "output_modalities": ["embeddings"], + "permaslug": "mistralai/mistral-embed-2312", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mistral Large 2407", - "slug": "mistralai/mistral-large-2407", + "short_name": "Mistral Embed 2312", + "slug": "mistralai/mistral-embed-2312", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-large-2407", - "model_variant_slug": "mistralai/mistral-large-2407", + "model_variant_permaslug": "mistralai/mistral-embed-2312", + "model_variant_slug": "mistralai/mistral-embed-2312", "moderation_required": false, - "name": "Mistral | mistralai/mistral-large-2407", + "name": "Mistral | mistralai/mistral-embed-2312", "pricing": { - "completion": "0.000006", + "completion": "0", "discount": 0, - "prompt": "0.000002" + "prompt": "0.0000001" }, "provider_display_name": "Mistral", "provider_info": { @@ -88401,7 +88579,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -88411,7 +88590,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "mistral-large-2407", + "provider_model_id": "mistral-embed-2312", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -88425,49 +88604,60 @@ "presence_penalty", "seed", "response_format", - "structured_outputs", - "tools", - "tool_choice" + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Mistral", - "has_text_output": true, + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", - "name": "Mistral Large 2407", - "output_modalities": ["text"], - "permaslug": "mistralai/mistral-large-2407", - "reasoning_config": null, + "model_version_group_id": null, + "name": "Mistral: Mistral Embed 2312", + "output_modalities": ["embeddings"], + "permaslug": "mistralai/mistral-embed-2312", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mistral Large 2407", - "slug": "mistralai/mistral-large-2407", + "short_name": "Mistral Embed 2312", + "slug": "mistralai/mistral-embed-2312", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "mistralai", - "context_length": 131072, - "created_at": "2024-11-19T01:11:25.108028+00:00", + "context_length": 262144, + "created_at": "2025-12-01T21:27:52.65109+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": 0.0645, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.", + "description": "Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -88476,17 +88666,17 @@ "training": false }, "features": { - "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": false, - "id": "26f5ecd0-44cb-43e8-8cfc-7b155c2e8c05", + "id": "fa718841-07ea-4b7e-b8a3-6069545c7e6a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -88500,41 +88690,54 @@ "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 128000, - "created_at": "2024-11-19T01:11:25.108028+00:00", + "context_length": 256000, + "created_at": "2025-12-01T21:27:52.65109+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": 0.0645, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411)\n\nIt provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable improvements in long context understanding, a new system prompt, and more accurate function calling.", - "features": {}, + "description": "Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Mistral", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, - "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", - "name": "Mistral Large 2411", + "model_version_group_id": null, + "name": "Mistral: Mistral Large 3 2512", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-large-2411", - "reasoning_config": null, + "permaslug": "mistralai/mistral-large-2512", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mistral Large 2411", - "slug": "mistralai/mistral-large-2411", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Mistral Large 3 2512", + "slug": "mistralai/mistral-large-2512", + "updated_at": "2025-12-02T15:19:24.969824+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-large-2411", - "model_variant_slug": "mistralai/mistral-large-2411", + "model_variant_permaslug": "mistralai/mistral-large-2512", + "model_variant_slug": "mistralai/mistral-large-2512", "moderation_required": false, - "name": "Mistral | mistralai/mistral-large-2411", + "name": "Mistral | mistralai/mistral-large-2512", "pricing": { - "completion": "0.000006", + "completion": "0.0000015", "discount": 0, - "prompt": "0.000002" + "prompt": "0.0000005" }, "provider_display_name": "Mistral", "provider_info": { @@ -88585,7 +88788,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -88595,7 +88799,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "mistral-large-2411", + "provider_model_id": "mistral-large-2512", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -88619,39 +88823,50 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Mistral", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, - "model_version_group_id": "83129748-0564-4485-982a-d7a37a1ef3ec", - "name": "Mistral Large 2411", + "model_version_group_id": null, + "name": "Mistral: Mistral Large 3 2512", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-large-2411", - "reasoning_config": null, + "permaslug": "mistralai/mistral-large-2512", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mistral Large 2411", - "slug": "mistralai/mistral-large-2411", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Mistral Large 3 2512", + "slug": "mistralai/mistral-large-2512", + "updated_at": "2025-12-02T15:19:24.969824+00:00", "warning_message": null }, { "author": "mistralai", - "context_length": 32768, - "created_at": "2024-01-10T00:00:00+00:00", + "context_length": 131072, + "created_at": "2025-05-07T14:15:41.980763+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Note: This model is being deprecated. Recommended replacement is the newer [Ministral 8B](/mistral/ministral-8b)\n\nThis model is currently powered by Mistral-7B-v0.2, and incorporates a \"better\" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.", + "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -88670,7 +88885,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "f543d3e4-4a9b-43f3-9988-c19abd5246c0", + "id": "9d5ba5bf-8465-46df-9185-1330820338f5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -88684,41 +88899,41 @@ "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 32000, - "created_at": "2024-01-10T00:00:00+00:00", + "context_length": 131072, + "created_at": "2025-05-07T14:15:41.980763+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Note: This model is being deprecated. Recommended replacement is the newer [Ministral 8B](/mistral/ministral-8b)\n\nThis model is currently powered by Mistral-7B-v0.2, and incorporates a \"better\" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.", + "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", "features": {}, "group": "Mistral", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral Tiny", + "name": "Mistral: Mistral Medium 3", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-tiny", + "permaslug": "mistralai/mistral-medium-3", "reasoning_config": null, "router": null, - "short_name": "Mistral Tiny", - "slug": "mistralai/mistral-tiny", + "short_name": "Mistral Medium 3", + "slug": "mistralai/mistral-medium-3", "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": "This model is deprecated and slated for retirement. Please see [Ministral 8B](/mistral/ministral-8b) for the Mistral suggested upgrade." + "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-tiny", - "model_variant_slug": "mistralai/mistral-tiny", + "model_variant_permaslug": "mistralai/mistral-medium-3", + "model_variant_slug": "mistralai/mistral-medium-3", "moderation_required": false, - "name": "Mistral | mistralai/mistral-tiny", + "name": "Mistral | mistralai/mistral-medium-3", "pricing": { - "completion": "0.00000025", + "completion": "0.000002", "discount": 0, - "prompt": "0.00000025" + "prompt": "0.0000004" }, "provider_display_name": "Mistral", "provider_info": { @@ -88769,7 +88984,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -88779,7 +88995,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "open-mistral-7b", + "provider_model_id": "mistral-medium-2505", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -88809,33 +89025,33 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral Tiny", + "name": "Mistral: Mistral Medium 3", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-tiny", + "permaslug": "mistralai/mistral-medium-3", "reasoning_config": null, "router": null, - "short_name": "Mistral Tiny", - "slug": "mistralai/mistral-tiny", + "short_name": "Mistral Medium 3", + "slug": "mistralai/mistral-medium-3", "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": "This model is deprecated and slated for retirement. Please see [Ministral 8B](/mistral/ministral-8b) for the Mistral suggested upgrade." + "warning_message": null }, { "author": "mistralai", - "context_length": 256000, - "created_at": "2025-08-01T20:20:30.639517+00:00", + "context_length": 131072, + "created_at": "2025-08-13T14:33:59.459114+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)", + "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3.1 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 256000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -88844,10 +89060,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, @@ -88858,7 +89071,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "dfdc2841-297c-4de9-96fe-eab0ccfa25a6", + "id": "13c5002b-8c5b-490c-8a5f-52ec0242804f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -88872,14 +89085,14 @@ "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 256000, - "created_at": "2025-08-01T20:20:30.639517+00:00", + "context_length": 131072, + "created_at": "2025-08-13T14:33:59.459114+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)", + "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3.1 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", "features": { "reasoning_config": { "end_token": null, @@ -88892,31 +89105,31 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Codestral 2508", + "name": "Mistral: Mistral Medium 3.1", "output_modalities": ["text"], - "permaslug": "mistralai/codestral-2508", + "permaslug": "mistralai/mistral-medium-3.1", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Codestral 2508", - "slug": "mistralai/codestral-2508", + "short_name": "Mistral Medium 3.1", + "slug": "mistralai/mistral-medium-3.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/codestral-2508", - "model_variant_slug": "mistralai/codestral-2508", + "model_variant_permaslug": "mistralai/mistral-medium-3.1", + "model_variant_slug": "mistralai/mistral-medium-3.1", "moderation_required": false, - "name": "Mistral | mistralai/codestral-2508", + "name": "Mistral | mistralai/mistral-medium-3.1", "pricing": { - "completion": "0.0000009", + "completion": "0.000002", "discount": 0, - "prompt": "0.0000003" + "prompt": "0.0000004" }, "provider_display_name": "Mistral", "provider_info": { @@ -88967,7 +89180,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -88977,14 +89191,12 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "codestral-2508", + "provider_model_id": "mistral-medium-2508", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", @@ -88992,6 +89204,8 @@ "frequency_penalty", "presence_penalty", "seed", + "response_format", + "structured_outputs", "tools", "tool_choice" ], @@ -89013,39 +89227,37 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Codestral 2508", + "name": "Mistral: Mistral Medium 3.1", "output_modalities": ["text"], - "permaslug": "mistralai/codestral-2508", + "permaslug": "mistralai/mistral-medium-3.1", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Codestral 2508", - "slug": "mistralai/codestral-2508", + "short_name": "Mistral Medium 3.1", + "slug": "mistralai/mistral-medium-3.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "mistralai", - "context_length": 8192, - "created_at": "2025-10-30T22:47:40.42301+00:00", + "context_length": 131072, + "created_at": "2024-07-19T00:00:00+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.3 }, - "default_stops": [], + "default_stops": ["[INST]", ""], "default_system": null, - "description": "Mistral Codestral Embed is specially designed for code, perfect for embedding code databases, repositories, and powering coding assistants with state-of-the-art retrieval.", + "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 8192, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -89054,7 +89266,7 @@ "training": false }, "features": { - "supports_input_audio": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -89064,7 +89276,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "a93321a4-2cbf-4bd0-a0cf-09cb593fb821", + "id": "2dfff8cc-cb8c-451d-8ca4-1356ad56218d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -89078,52 +89290,39 @@ "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 8192, - "created_at": "2025-10-30T22:47:40.42301+00:00", + "context_length": 131072, + "created_at": "2024-07-19T00:00:00+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.3 }, - "default_stops": [], + "default_stops": ["[INST]", ""], "default_system": null, - "description": "Mistral Codestral Embed is specially designed for code, perfect for embedding code databases, repositories, and powering coding assistants with state-of-the-art retrieval.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", + "features": {}, "group": "Mistral", - "has_text_output": false, - "hf_slug": null, + "has_text_output": true, + "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Mistral: Codestral Embed 2505", - "output_modalities": ["embeddings"], - "permaslug": "mistralai/codestral-embed-2505", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "name": "Mistral: Mistral Nemo", + "output_modalities": ["text"], + "permaslug": "mistralai/mistral-nemo", + "reasoning_config": null, "router": null, - "short_name": "Codestral Embed 2505", - "slug": "mistralai/codestral-embed-2505", + "short_name": "Mistral Nemo", + "slug": "mistralai/mistral-nemo", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/codestral-embed-2505", - "model_variant_slug": "mistralai/codestral-embed-2505", + "model_variant_permaslug": "mistralai/mistral-nemo", + "model_variant_slug": "mistralai/mistral-nemo", "moderation_required": false, - "name": "Mistral | mistralai/codestral-embed-2505", + "name": "Mistral | mistralai/mistral-nemo", "pricing": { - "completion": "0", + "completion": "0.00000015", "discount": 0, "prompt": "0.00000015" }, @@ -89176,7 +89375,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -89186,7 +89386,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "codestral-embed-2505", + "provider_model_id": "open-mistral-nemo-2407", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -89200,187 +89400,45 @@ "presence_penalty", "seed", "response_format", - "structured_outputs" + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "features": {}, "group": "Mistral", - "has_text_output": false, - "hf_slug": null, - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Mistral: Codestral Embed 2505", - "output_modalities": ["embeddings"], - "permaslug": "mistralai/codestral-embed-2505", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Codestral Embed 2505", - "slug": "mistralai/codestral-embed-2505", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "Other", - "context_length": 262144, - "created_at": "2025-08-26T20:08:47.000Z", - "default_parameters": null, - "default_stops": [], - "default_system": null, - "description": "Devstral 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 123B-parameter dense transformer model supporting a 256K context window. It is provided free of charge in Kilo Code for a limited time.\n**Note:** prompts and completions may be logged by Mistral during the free period and used to improve the model.", - "endpoint": { - "adapter_name": "other", - "can_abort": true, - "context_length": 262144, - "data_policy": { - "canPublish": false, - "retainsPrompts": true, - "training": true - }, - "features": null, - "has_chat_completions": true, - "has_completions": false, - "id": "mistralai/devstral-2512:free", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": true, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 262144, - "max_prompt_images": null, - "max_prompt_tokens": 262144, - "max_tokens_per_image": null, - "model": { - "author": "Other", - "context_length": 262144, - "created_at": "2025-08-26T20:08:47.000Z", - "default_parameters": null, - "default_stops": [], - "default_system": null, - "description": "Devstral 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 123B-parameter dense transformer model supporting a 256K context window. It is provided free of charge in Kilo Code for a limited time.\n**Note:** prompts and completions may be logged by Mistral during the free period and used to improve the model.", - "features": null, - "group": "other", - "has_text_output": true, - "hf_slug": null, - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Mistral: Devstral 2 2512 (free)", - "output_modalities": ["text"], - "permaslug": "mistralai/devstral-2512:free", - "reasoning_config": null, - "router": null, - "short_name": "Mistral: Devstral 2 2512 (free)", - "slug": "mistralai/devstral-2512:free", - "updated_at": "2026-01-13T12:11:27.631Z", - "warning_message": null - }, - "model_variant_permaslug": "mistralai/devstral-2512:free", - "model_variant_slug": "mistralai/devstral-2512:free", - "moderation_required": false, - "name": "Mistral: Devstral 2 2512 (free)", - "pricing": { - "completion": "0.0000000", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000000", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "Other", - "provider_info": { - "adapterName": "other", - "baseUrl": "https://kilo.ai", - "byokEnabled": false, - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": true - }, - "displayName": "Other", - "editors": [], - "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "Unknown", - "icon": { - "className": "rounded-sm", - "url": "https://via.placeholder.com/32x32/000000/FFFFFF?text=S" - }, - "ignoredProviderModels": [], - "isAbortable": true, - "isMultipartSupported": true, - "moderationRequired": false, - "name": "Other", - "owners": [], - "slug": "other", - "statusPageUrl": null - }, - "provider_model_id": "mistralai/devstral-2512:free", - "provider_name": "Other", - "provider_region": null, - "provider_slug": "other", - "quantization": null, - "supported_parameters": ["max_tokens", "temperature", "tools"], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "default" - }, - "features": null, - "group": "other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Mistral: Devstral 2 2512 (free)", + "name": "Mistral: Mistral Nemo", "output_modalities": ["text"], - "permaslug": "mistralai/devstral-2512:free", + "permaslug": "mistralai/mistral-nemo", "reasoning_config": null, "router": null, - "short_name": "Mistral: Devstral 2 2512 (free)", - "slug": "mistralai/devstral-2512", - "updated_at": "2026-01-13T12:11:27.631Z", + "short_name": "Mistral Nemo", + "slug": "mistralai/mistral-nemo", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "mistralai", "context_length": 131072, - "created_at": "2025-07-10T15:28:41.981407+00:00", + "created_at": "2025-06-20T18:10:16.960494+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI. Positioned as a step up from Devstral Small, it achieves 61.6% on SWE-Bench Verified, placing it ahead of Gemini 2.5 Pro and GPT-4.1 in code-related tasks, at a fraction of the cost. It is designed for generalization across prompt styles and tool use in code agents and frameworks.\n\nDevstral Medium is available via API only (not open-weight), and supports enterprise deployment on private infrastructure, with optional fine-tuning capabilities.", + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, @@ -89393,7 +89451,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -89403,65 +89460,55 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "8f47daf9-62e7-423d-96de-fcb241b39175", + "id": "23fd6b91-74d8-45fc-ac00-cf141ec7f4a5", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 100, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 131072, - "created_at": "2025-07-10T15:28:41.981407+00:00", + "context_length": 128000, + "created_at": "2025-06-20T18:10:16.960494+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI. Positioned as a step up from Devstral Small, it achieves 61.6% on SWE-Bench Verified, placing it ahead of Gemini 2.5 Pro and GPT-4.1 in code-related tasks, at a fraction of the cost. It is designed for generalization across prompt styles and tool use in code agents and frameworks.\n\nDevstral Medium is available via API only (not open-weight), and supports enterprise deployment on private infrastructure, with optional fine-tuning capabilities.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", + "features": {}, "group": "Mistral", "has_text_output": true, - "hf_slug": null, + "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Devstral Medium", + "name": "Mistral: Mistral Small 3.2 24B", "output_modalities": ["text"], - "permaslug": "mistralai/devstral-medium-2507", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", + "reasoning_config": null, "router": null, - "short_name": "Devstral Medium", - "slug": "mistralai/devstral-medium", + "short_name": "Mistral Small 3.2 24B", + "slug": "mistralai/mistral-small-3.2-24b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/devstral-medium-2507", - "model_variant_slug": "mistralai/devstral-medium", + "model_variant_permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", + "model_variant_slug": "mistralai/mistral-small-3.2-24b-instruct", "moderation_required": false, - "name": "Mistral | mistralai/devstral-medium-2507", + "name": "Mistral | mistralai/mistral-small-3.2-24b-instruct-2506", "pricing": { - "completion": "0.000002", + "completion": "0.0000003", "discount": 0, - "prompt": "0.0000004" + "prompt": "0.0000001" }, "provider_display_name": "Mistral", "provider_info": { @@ -89512,7 +89559,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -89522,7 +89570,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "devstral-medium-2507", + "provider_model_id": "mistral-small-2506", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -89546,49 +89594,41 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "features": {}, "group": "Mistral", "has_text_output": true, - "hf_slug": null, + "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Devstral Medium", + "name": "Mistral: Mistral Small 3.2 24B", "output_modalities": ["text"], - "permaslug": "mistralai/devstral-medium-2507", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", + "reasoning_config": null, "router": null, - "short_name": "Devstral Medium", - "slug": "mistralai/devstral-medium", + "short_name": "Mistral Small 3.2 24B", + "slug": "mistralai/mistral-small-3.2-24b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "mistralai", - "context_length": 131072, - "created_at": "2025-07-10T15:19:11.726916+00:00", + "context_length": 32768, + "created_at": "2025-12-16T18:10:53.032181+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": 0.3, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and released under the Apache 2.0 license, it features a 128k token context window and supports both Mistral-style function calling and XML output formats.\n\nDesigned for agentic coding workflows, Devstral Small 1.1 is optimized for tasks such as codebase exploration, multi-file edits, and integration into autonomous development agents like OpenHands and Cline. It achieves 53.6% on SWE-Bench Verified, surpassing all other open models on this benchmark, while remaining lightweight enough to run on a single 4090 GPU or Apple silicon machine. The model uses a Tekken tokenizer with a 131k vocabulary and is deployable via vLLM, Transformers, Ollama, LM Studio, and other OpenAI-compatible runtimes.\n", + "description": "Mistral Small Creative is an experimental small model designed for creative writing, narrative generation, roleplay and character-driven dialogue, general-purpose instruction following, and conversational agents.", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -89597,17 +89637,16 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": false, - "id": "768e136a-0758-4c83-b1a7-701cb57da9e7", + "id": "0118a65e-79f9-48b0-924e-38c4a41378e1", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -89621,15 +89660,20 @@ "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 131072, - "created_at": "2025-07-10T15:19:11.726916+00:00", + "context_length": 32768, + "created_at": "2025-12-16T18:10:53.032181+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": 0.3, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and released under the Apache 2.0 license, it features a 128k token context window and supports both Mistral-style function calling and XML output formats.\n\nDesigned for agentic coding workflows, Devstral Small 1.1 is optimized for tasks such as codebase exploration, multi-file edits, and integration into autonomous development agents like OpenHands and Cline. It achieves 53.6% on SWE-Bench Verified, surpassing all other open models on this benchmark, while remaining lightweight enough to run on a single 4090 GPU or Apple silicon machine. The model uses a Tekken tokenizer with a 131k vocabulary and is deployable via vLLM, Transformers, Ollama, LM Studio, and other OpenAI-compatible runtimes.\n", + "description": "Mistral Small Creative is an experimental small model designed for creative writing, narrative generation, roleplay and character-driven dialogue, general-purpose instruction following, and conversational agents.", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, @@ -89638,30 +89682,30 @@ }, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Devstral-Small-2507", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Devstral Small 1.1", + "name": "Mistral: Mistral Small Creative", "output_modalities": ["text"], - "permaslug": "mistralai/devstral-small-2507", + "permaslug": "mistralai/mistral-small-creative-20251216", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Devstral Small 1.1", - "slug": "mistralai/devstral-small", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Mistral Small Creative", + "slug": "mistralai/mistral-small-creative", + "updated_at": "2025-12-16T20:02:56.287656+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/devstral-small-2507", - "model_variant_slug": "mistralai/devstral-small", + "model_variant_permaslug": "mistralai/mistral-small-creative-20251216", + "model_variant_slug": "mistralai/mistral-small-creative", "moderation_required": false, - "name": "Mistral | mistralai/devstral-small-2507", + "name": "Mistral | mistralai/mistral-small-creative-20251216", "pricing": { "completion": "0.0000003", "discount": 0, @@ -89716,7 +89760,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -89726,24 +89771,12 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "devstral-small-2507", + "provider_model_id": "labs-mistral-small-creative", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", "quantization": "unknown", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "response_format", - "structured_outputs", - "tools", - "tool_choice" - ], + "supported_parameters": ["tools", "tool_choice"], "supports_multipart": true, "supports_reasoning": false, "supports_tool_parameters": true, @@ -89751,6 +89784,9 @@ "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, @@ -89759,175 +89795,40 @@ }, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Devstral-Small-2507", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Devstral Small 1.1", + "name": "Mistral: Mistral Small Creative", "output_modalities": ["text"], - "permaslug": "mistralai/devstral-small-2507", + "permaslug": "mistralai/mistral-small-creative-20251216", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Devstral Small 1.1", - "slug": "mistralai/devstral-small", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "Other", - "context_length": 262144, - "created_at": "2025-08-26T20:08:47.000Z", - "default_parameters": null, - "default_stops": [], - "default_system": null, - "description": "Devstral Small 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 24B-parameter dense transformer model supporting a 256K context window.\n**Note:** prompts and completions may be logged by Mistral during the free period and used to improve the model.", - "endpoint": { - "adapter_name": "other", - "can_abort": true, - "context_length": 262144, - "data_policy": { - "canPublish": false, - "retainsPrompts": true, - "training": true - }, - "features": null, - "has_chat_completions": true, - "has_completions": false, - "id": "mistralai/devstral-small-2512:free", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": true, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 262144, - "max_prompt_images": null, - "max_prompt_tokens": 262144, - "max_tokens_per_image": null, - "model": { - "author": "Other", - "context_length": 262144, - "created_at": "2025-08-26T20:08:47.000Z", - "default_parameters": null, - "default_stops": [], - "default_system": null, - "description": "Devstral Small 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 24B-parameter dense transformer model supporting a 256K context window.\n**Note:** prompts and completions may be logged by Mistral during the free period and used to improve the model.", - "features": null, - "group": "other", - "has_text_output": true, - "hf_slug": null, - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Mistral: Devstral Small 2 2512 (free)", - "output_modalities": ["text"], - "permaslug": "mistralai/devstral-small-2512:free", - "reasoning_config": null, - "router": null, - "short_name": "Mistral: Devstral Small 2 2512 (free)", - "slug": "mistralai/devstral-small-2512:free", - "updated_at": "2026-01-13T12:11:27.631Z", - "warning_message": null - }, - "model_variant_permaslug": "mistralai/devstral-small-2512:free", - "model_variant_slug": "mistralai/devstral-small-2512:free", - "moderation_required": false, - "name": "Mistral: Devstral Small 2 2512 (free)", - "pricing": { - "completion": "0.0000000", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000000", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "Other", - "provider_info": { - "adapterName": "other", - "baseUrl": "https://kilo.ai", - "byokEnabled": false, - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": true - }, - "displayName": "Other", - "editors": [], - "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "Unknown", - "icon": { - "className": "rounded-sm", - "url": "https://via.placeholder.com/32x32/000000/FFFFFF?text=S" - }, - "ignoredProviderModels": [], - "isAbortable": true, - "isMultipartSupported": true, - "moderationRequired": false, - "name": "Other", - "owners": [], - "slug": "other", - "statusPageUrl": null - }, - "provider_model_id": "mistralai/devstral-small-2512:free", - "provider_name": "Other", - "provider_region": null, - "provider_slug": "other", - "quantization": null, - "supported_parameters": ["max_tokens", "temperature", "tools"], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "default" - }, - "features": null, - "group": "other", - "has_text_output": true, - "hf_slug": null, - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Mistral: Devstral Small 2 2512 (free)", - "output_modalities": ["text"], - "permaslug": "mistralai/devstral-small-2512:free", - "reasoning_config": null, - "router": null, - "short_name": "Mistral: Devstral Small 2 2512 (free)", - "slug": "mistralai/devstral-small-2512", - "updated_at": "2026-01-13T12:11:27.631Z", + "short_name": "Mistral Small Creative", + "slug": "mistralai/mistral-small-creative", + "updated_at": "2025-12-16T20:02:56.287656+00:00", "warning_message": null }, { "author": "mistralai", - "context_length": 262144, - "created_at": "2025-12-02T13:22:15.851192+00:00", + "context_length": 65536, + "created_at": "2024-04-17T00:00:00+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": 0.3, - "top_p": null + "temperature": 0.3 }, - "default_stops": [], + "default_stops": ["[INST]", ""], "default_system": null, - "description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.", + "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:\n- strong math, coding, and reasoning\n- large context length (64k)\n- fluency in English, French, Italian, German, and Spanish\n\nSee benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).\n#moe", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 65536, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -89936,7 +89837,6 @@ "training": false }, "features": { - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -89946,7 +89846,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "0b53717b-9af5-4de3-80af-ebedd2adf981", + "id": "e55a0c16-08a0-4871-ae6a-97482cc3231a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -89960,54 +89860,41 @@ "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 128000, - "created_at": "2025-12-02T13:22:15.851192+00:00", + "context_length": 65536, + "created_at": "2024-04-17T00:00:00+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": 0.3, - "top_p": null + "temperature": 0.3 }, - "default_stops": [], + "default_stops": ["[INST]", ""], "default_system": null, - "description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:\n- strong math, coding, and reasoning\n- large context length (64k)\n- fluency in English, French, Italian, German, and Spanish\n\nSee benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).\n#moe", + "features": {}, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Ministral-3-14B-Instruct-2512", + "hf_slug": "mistralai/Mixtral-8x22B-Instruct-v0.1", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Mistral: Ministral 3 14B 2512", + "name": "Mistral: Mixtral 8x22B Instruct", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-14b-2512", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/mixtral-8x22b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Ministral 3 14B 2512", - "slug": "mistralai/ministral-14b-2512", - "updated_at": "2025-12-10T16:54:21.432818+00:00", + "short_name": "Mixtral 8x22B Instruct", + "slug": "mistralai/mixtral-8x22b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/ministral-14b-2512", - "model_variant_slug": "mistralai/ministral-14b-2512", + "model_variant_permaslug": "mistralai/mixtral-8x22b-instruct", + "model_variant_slug": "mistralai/mixtral-8x22b-instruct", "moderation_required": false, - "name": "Mistral | mistralai/ministral-14b-2512", + "name": "Mistral | mistralai/mixtral-8x22b-instruct", "pricing": { - "completion": "0.0000002", + "completion": "0.000006", "discount": 0, - "prompt": "0.0000002" + "prompt": "0.000002" }, "provider_display_name": "Mistral", "provider_info": { @@ -90058,7 +89945,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -90068,7 +89956,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "ministral-14b-2512", + "provider_model_id": "open-mixtral-8x22b", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -90092,48 +89980,35 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "features": {}, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Ministral-3-14B-Instruct-2512", + "hf_slug": "mistralai/Mixtral-8x22B-Instruct-v0.1", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Mistral: Ministral 3 14B 2512", + "name": "Mistral: Mixtral 8x22B Instruct", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-14b-2512", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/mixtral-8x22b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Ministral 3 14B 2512", - "slug": "mistralai/ministral-14b-2512", - "updated_at": "2025-12-10T16:54:21.432818+00:00", + "short_name": "Mixtral 8x22B Instruct", + "slug": "mistralai/mixtral-8x22b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "mistralai", "context_length": 131072, - "created_at": "2025-12-02T13:19:20.726635+00:00", + "created_at": "2024-09-10T00:00:00+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": 0.3, - "top_p": null + "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "The smallest model in the Ministral 3 family, Ministral 3 3B is a powerful, efficient tiny language model with vision capabilities.", + "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, @@ -90146,7 +90021,7 @@ "training": false }, "features": { - "supports_input_audio": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -90156,7 +90031,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "b9f6828f-aef9-498a-91f5-6816fbf72420", + "id": "e1d77a4e-3a3d-4dc4-9039-4d311dcb4dba", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -90170,54 +90045,41 @@ "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 128000, - "created_at": "2025-12-02T13:19:20.726635+00:00", + "context_length": 4096, + "created_at": "2024-09-10T00:00:00+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": 0.3, - "top_p": null + "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "The smallest model in the Ministral 3 family, Ministral 3 3B is a powerful, efficient tiny language model with vision capabilities.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", + "features": {}, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Ministral-3-3B-Instruct-2512", + "hf_slug": "mistralai/Pixtral-12B-2409", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Ministral 3 3B 2512", + "name": "Mistral: Pixtral 12B", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-3b-2512", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/pixtral-12b", + "reasoning_config": null, "router": null, - "short_name": "Ministral 3 3B 2512", - "slug": "mistralai/ministral-3b-2512", - "updated_at": "2025-12-10T16:53:53.577361+00:00", + "short_name": "Pixtral 12B", + "slug": "mistralai/pixtral-12b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/ministral-3b-2512", - "model_variant_slug": "mistralai/ministral-3b-2512", + "model_variant_permaslug": "mistralai/pixtral-12b", + "model_variant_slug": "mistralai/pixtral-12b", "moderation_required": false, - "name": "Mistral | mistralai/ministral-3b-2512", + "name": "Mistral | mistralai/pixtral-12b", "pricing": { - "completion": "0.0000001", + "completion": "0.00000015", "discount": 0, - "prompt": "0.0000001" + "prompt": "0.00000015" }, "provider_display_name": "Mistral", "provider_info": { @@ -90268,7 +90130,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -90278,7 +90141,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "ministral-3b-2512", + "provider_model_id": "pixtral-12b-2409", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -90293,8 +90156,8 @@ "seed", "response_format", "structured_outputs", - "tool_choice", - "tools" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, @@ -90302,52 +90165,39 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "features": {}, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Ministral-3-3B-Instruct-2512", + "hf_slug": "mistralai/Pixtral-12B-2409", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Ministral 3 3B 2512", + "name": "Mistral: Pixtral 12B", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-3b-2512", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/pixtral-12b", + "reasoning_config": null, "router": null, - "short_name": "Ministral 3 3B 2512", - "slug": "mistralai/ministral-3b-2512", - "updated_at": "2025-12-10T16:53:53.577361+00:00", + "short_name": "Pixtral 12B", + "slug": "mistralai/pixtral-12b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "mistralai", - "context_length": 262144, - "created_at": "2025-12-02T13:20:54.103183+00:00", + "context_length": 131072, + "created_at": "2024-11-19T00:49:48.873161+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": 0.3, - "top_p": null + "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "A balanced model in the Ministral 3 family, Ministral 3 8B is a powerful, efficient tiny language model with vision capabilities.", + "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images.\n\nThe model is available under the Mistral Research License (MRL) for research and educational use, and the Mistral Commercial License for experimentation, testing, and production for commercial purposes.\n\n", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -90356,7 +90206,7 @@ "training": false }, "features": { - "supports_input_audio": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -90366,7 +90216,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "c85d26e1-1e0c-449b-9775-2afce7ae510b", + "id": "1a41639e-c1cf-422e-a871-27bc67f03928", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -90381,53 +90231,40 @@ "model": { "author": "mistralai", "context_length": 128000, - "created_at": "2025-12-02T13:20:54.103183+00:00", + "created_at": "2024-11-19T00:49:48.873161+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": 0.3, - "top_p": null + "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "A balanced model in the Ministral 3 family, Ministral 3 8B is a powerful, efficient tiny language model with vision capabilities.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images.\n\nThe model is available under the Mistral Research License (MRL) for research and educational use, and the Mistral Commercial License for experimentation, testing, and production for commercial purposes.\n\n", + "features": {}, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Ministral-3-8B-Instruct-2512", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Ministral 3 8B 2512", + "name": "Mistral: Pixtral Large 2411", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-8b-2512", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/pixtral-large-2411", + "reasoning_config": null, "router": null, - "short_name": "Ministral 3 8B 2512", - "slug": "mistralai/ministral-8b-2512", - "updated_at": "2025-12-10T16:54:03.715606+00:00", + "short_name": "Pixtral Large 2411", + "slug": "mistralai/pixtral-large-2411", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/ministral-8b-2512", - "model_variant_slug": "mistralai/ministral-8b-2512", + "model_variant_permaslug": "mistralai/pixtral-large-2411", + "model_variant_slug": "mistralai/pixtral-large-2411", "moderation_required": false, - "name": "Mistral | mistralai/ministral-8b-2512", + "name": "Mistral | mistralai/pixtral-large-2411", "pricing": { - "completion": "0.00000015", + "completion": "0.000006", "discount": 0, - "prompt": "0.00000015" + "prompt": "0.000002" }, "provider_display_name": "Mistral", "provider_info": { @@ -90478,7 +90315,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -90488,7 +90326,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "ministral-8b-2512", + "provider_model_id": "pixtral-large-2411", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -90512,50 +90350,39 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, + "features": {}, "group": "Mistral", "has_text_output": true, - "hf_slug": "mistralai/Ministral-3-8B-Instruct-2512", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Ministral 3 8B 2512", + "name": "Mistral: Pixtral Large 2411", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-8b-2512", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/pixtral-large-2411", + "reasoning_config": null, "router": null, - "short_name": "Ministral 3 8B 2512", - "slug": "mistralai/ministral-8b-2512", - "updated_at": "2025-12-10T16:54:03.715606+00:00", + "short_name": "Pixtral Large 2411", + "slug": "mistralai/pixtral-large-2411", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "mistralai", - "context_length": 131072, - "created_at": "2024-10-17T00:00:00+00:00", + "context_length": 32768, + "created_at": "2025-02-17T14:40:39.116446+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Ministral 3B is a 3B parameter model optimized for on-device and edge computing. It excels in knowledge, commonsense reasoning, and function-calling, outperforming larger models like Mistral 7B on most benchmarks. Supporting up to 128k context length, it’s ideal for orchestrating agentic workflows and specialist tasks with efficient inference.", + "description": "Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance. Trained on curated regional datasets, it supports multiple Indian-origin languages—including Tamil and Malayalam—alongside Arabic. This makes it a versatile option for a range of regional and multilingual applications. Read more at the blog post [here](https://mistral.ai/en/news/mistral-saba)", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -90574,7 +90401,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "71ce5f13-6ef2-4cbe-a650-66d2f11a4ecb", + "id": "c5a8bebe-8564-47ed-9d05-4151aa3c6e3a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -90588,14 +90415,14 @@ "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 128000, - "created_at": "2024-10-17T00:00:00+00:00", + "context_length": 32000, + "created_at": "2025-02-17T14:40:39.116446+00:00", "default_parameters": { "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Ministral 3B is a 3B parameter model optimized for on-device and edge computing. It excels in knowledge, commonsense reasoning, and function-calling, outperforming larger models like Mistral 7B on most benchmarks. Supporting up to 128k context length, it’s ideal for orchestrating agentic workflows and specialist tasks with efficient inference.", + "description": "Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance. Trained on curated regional datasets, it supports multiple Indian-origin languages—including Tamil and Malayalam—alongside Arabic. This makes it a versatile option for a range of regional and multilingual applications. Read more at the blog post [here](https://mistral.ai/en/news/mistral-saba)", "features": {}, "group": "Mistral", "has_text_output": true, @@ -90605,24 +90432,24 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Ministral 3B", + "name": "Mistral: Saba", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-3b", + "permaslug": "mistralai/mistral-saba-2502", "reasoning_config": null, "router": null, - "short_name": "Ministral 3B", - "slug": "mistralai/ministral-3b", + "short_name": "Saba", + "slug": "mistralai/mistral-saba", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/ministral-3b", - "model_variant_slug": "mistralai/ministral-3b", + "model_variant_permaslug": "mistralai/mistral-saba-2502", + "model_variant_slug": "mistralai/mistral-saba", "moderation_required": false, - "name": "Mistral | mistralai/ministral-3b", + "name": "Mistral | mistralai/mistral-saba-2502", "pricing": { - "completion": "0.00000004", + "completion": "0.0000006", "discount": 0, - "prompt": "0.00000004" + "prompt": "0.0000002" }, "provider_display_name": "Mistral", "provider_info": { @@ -90673,7 +90500,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -90683,7 +90511,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "ministral-3b-2410", + "provider_model_id": "mistral-saba-2502", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -90716,30 +90544,32 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Ministral 3B", + "name": "Mistral: Saba", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-3b", + "permaslug": "mistralai/mistral-saba-2502", "reasoning_config": null, "router": null, - "short_name": "Ministral 3B", - "slug": "mistralai/ministral-3b", + "short_name": "Saba", + "slug": "mistralai/mistral-saba", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "mistralai", - "context_length": 131072, - "created_at": "2024-10-17T00:00:00+00:00", + "context_length": 32000, + "created_at": "2025-10-30T14:39:04+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": 0.2, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Ministral 8B is an 8B parameter model featuring a unique interleaved sliding-window attention pattern for faster, memory-efficient inference. Designed for edge use cases, it supports up to 128k context length and excels in knowledge and reasoning tasks. It outperforms peers in the sub-10B category, making it perfect for low-latency, privacy-first applications.", + "description": "Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding. Input audio is priced at $100 per million seconds.", "endpoint": { "adapter_name": "MistralAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 32000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", @@ -90748,6 +90578,8 @@ "training": false }, "features": { + "supported_parameters": {}, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -90757,7 +90589,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "c70696c7-73c0-47c3-96b4-20c44b17101b", + "id": "814f9be0-e226-4b8c-bb5d-bf684359da13", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -90771,39 +90603,52 @@ "max_tokens_per_image": null, "model": { "author": "mistralai", - "context_length": 128000, - "created_at": "2024-10-17T00:00:00+00:00", + "context_length": 32000, + "created_at": "2025-10-30T14:39:04+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": 0.2, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Ministral 8B is an 8B parameter model featuring a unique interleaved sliding-window attention pattern for faster, memory-efficient inference. Designed for edge use cases, it supports up to 128k context length and excels in knowledge and reasoning tasks. It outperforms peers in the sub-10B category, making it perfect for low-latency, privacy-first applications.", - "features": {}, + "description": "Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding. Input audio is priced at $100 per million seconds.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Mistral", "has_text_output": true, - "hf_slug": null, + "hf_slug": "mistralai/Voxtral-Small-24B-2507", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "audio"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Ministral 8B", + "name": "Mistral: Voxtral Small 24B 2507", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-8b", - "reasoning_config": null, + "permaslug": "mistralai/voxtral-small-24b-2507", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Ministral 8B", - "slug": "mistralai/ministral-8b", + "short_name": "Voxtral Small 24B 2507", + "slug": "mistralai/voxtral-small-24b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/ministral-8b", - "model_variant_slug": "mistralai/ministral-8b", + "model_variant_permaslug": "mistralai/voxtral-small-24b-2507", + "model_variant_slug": "mistralai/voxtral-small-24b-2507", "moderation_required": false, - "name": "Mistral | mistralai/ministral-8b", + "name": "Mistral | mistralai/voxtral-small-24b-2507", "pricing": { - "completion": "0.0000001", + "completion": "0.0000003", "discount": 0, "prompt": "0.0000001" }, @@ -90856,7 +90701,8 @@ "codestral-embed", "codestral-2501", "mistral-small-2501", - "mistral-ocr-2512" + "mistral-ocr-2512", + "labs-devstral-small-2512" ], "isAbortable": false, "isMultipartSupported": true, @@ -90866,7 +90712,7 @@ "slug": "mistral", "statusPageUrl": "https://status.mistral.ai/" }, - "provider_model_id": "ministral-8b-2410", + "provider_model_id": "voxtral-small-2507", "provider_name": "Mistral", "provider_region": null, "provider_slug": "mistral", @@ -90890,50 +90736,73 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Mistral", "has_text_output": true, - "hf_slug": null, + "hf_slug": "mistralai/Voxtral-Small-24B-2507", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "audio"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Ministral 8B", + "name": "Mistral: Voxtral Small 24B 2507", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-8b", - "reasoning_config": null, + "permaslug": "mistralai/voxtral-small-24b-2507", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Ministral 8B", - "slug": "mistralai/ministral-8b", + "short_name": "Voxtral Small 24B 2507", + "slug": "mistralai/voxtral-small-24b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - }, + } + ], + "name": "Mistral", + "slug": "mistral" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "datacenters": ["US"], + "displayName": "ModelRun", + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://api.runmodelrun.com&size=256" + }, + "models": [ { - "author": "mistralai", - "context_length": 8192, - "created_at": "2025-10-31T21:03:42.776557+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mistral Embed is a specialized embedding model for text data, optimized for semantic search and RAG applications. Developed by Mistral AI in late 2023, it produces 1024-dimensional vectors that effectively capture semantic relationships in text.", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "endpoint": { - "adapter_name": "MistralAdapter", - "can_abort": false, - "context_length": 8192, + "adapter_name": "OpenAIAdapter", + "can_abort": true, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://www.runmodelrun.com/privacy-policy.html", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.runmodelrun.com/TOS.html", "training": false }, "features": { - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -90942,8 +90811,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "13fc4fba-4a70-4768-8c95-4837d1919201", + "has_completions": true, + "id": "a3938710-1b99-4a1f-85f4-2494f3f659d3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -90952,134 +90821,89 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 163840, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 8192, - "created_at": "2025-10-31T21:03:42.776557+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mistral Embed is a specialized embedding model for text data, optimized for semantic search and RAG applications. Developed by Mistral AI in late 2023, it produces 1024-dimensional vectors that effectively capture semantic relationships in text.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Mistral", - "has_text_output": false, - "hf_slug": null, + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "features": {}, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Mistral: Mistral Embed 2312", - "output_modalities": ["embeddings"], - "permaslug": "mistralai/mistral-embed-2312", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, "router": null, - "short_name": "Mistral Embed 2312", - "slug": "mistralai/mistral-embed-2312", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-embed-2312", - "model_variant_slug": "mistralai/mistral-embed-2312", + "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", + "model_variant_slug": "deepseek/deepseek-chat-v3-0324", "moderation_required": false, - "name": "Mistral | mistralai/mistral-embed-2312", + "name": "ModelRun | deepseek/deepseek-chat-v3-0324", "pricing": { - "completion": "0", + "completion": "0.00000077", "discount": 0, - "prompt": "0.0000001" + "prompt": "0.0000002" }, - "provider_display_name": "Mistral", + "provider_display_name": "ModelRun", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", - "byokEnabled": true, + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api2.runmodelrun.com/v1", + "byokEnabled": false, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://www.runmodelrun.com/privacy-policy.html", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.runmodelrun.com/TOS.html", "training": false }, - "displayName": "Mistral", - "editors": ["{}"], + "displayName": "ModelRun", + "editors": [], "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "FR", + "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "/images/icons/Mistral.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://api.runmodelrun.com&size=256" }, - "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" - ], - "isAbortable": false, + "ignoredProviderModels": [], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "ModelRun", + "owners": ["org_398AY9gETwDQkj8n6SXr9rA6HL0"], + "slug": "modelrun", + "statusPageUrl": null }, - "provider_model_id": "mistral-embed-2312", - "provider_name": "Mistral", + "provider_model_id": "deepseek/deepseek-v3-0324", + "provider_name": "ModelRun", "provider_region": null, - "provider_slug": "mistral", - "quantization": "unknown", + "provider_slug": "modelrun/fp4", + "quantization": "fp4", "supported_parameters": [ "max_tokens", "temperature", - "top_p", - "stop", - "frequency_penalty", "presence_penalty", - "seed", - "response_format", - "structured_outputs" + "repetition_penalty", + "frequency_penalty", + "top_p", + "top_k", + "min_p", + "stop" ], "supports_multipart": true, "supports_reasoning": false, @@ -91087,269 +90911,247 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Mistral", - "has_text_output": false, - "hf_slug": null, + "features": {}, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Mistral: Mistral Embed 2312", - "output_modalities": ["embeddings"], - "permaslug": "mistralai/mistral-embed-2312", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, "router": null, - "short_name": "Mistral Embed 2312", - "slug": "mistralai/mistral-embed-2312", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 262144, - "created_at": "2025-12-01T21:27:52.65109+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.0645, + "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "endpoint": { - "adapter_name": "MistralAdapter", - "can_abort": false, - "context_length": 262144, + "adapter_name": "OpenAIAdapter", + "can_abort": true, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://www.runmodelrun.com/privacy-policy.html", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.runmodelrun.com/TOS.html", "training": false }, "features": { + "is_mandatory_reasoning": true, "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, - "literal_none": false, + "literal_none": true, "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": false, - "id": "fa718841-07ea-4b7e-b8a3-6069545c7e6a", + "has_completions": true, + "id": "1df69e91-1f72-4965-9fb3-be86cc8edc77", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 163840, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 256000, - "created_at": "2025-12-01T21:27:52.65109+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.0645, + "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Mistral", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": null, + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "Mistral: Mistral Large 3 2512", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-large-2512", + "permaslug": "deepseek/deepseek-r1-0528", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Mistral Large 3 2512", - "slug": "mistralai/mistral-large-2512", - "updated_at": "2025-12-02T15:19:24.969824+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-large-2512", - "model_variant_slug": "mistralai/mistral-large-2512", + "model_variant_permaslug": "deepseek/deepseek-r1-0528:free", + "model_variant_slug": "deepseek/deepseek-r1-0528:free", "moderation_required": false, - "name": "Mistral | mistralai/mistral-large-2512", + "name": "ModelRun | deepseek/deepseek-r1-0528:free", "pricing": { - "completion": "0.0000015", + "completion": "0", "discount": 0, - "prompt": "0.0000005" + "prompt": "0" }, - "provider_display_name": "Mistral", + "provider_display_name": "ModelRun", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", - "byokEnabled": true, + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api2.runmodelrun.com/v1", + "byokEnabled": false, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://www.runmodelrun.com/privacy-policy.html", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.runmodelrun.com/TOS.html", "training": false }, - "displayName": "Mistral", - "editors": ["{}"], + "displayName": "ModelRun", + "editors": [], "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "FR", + "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "/images/icons/Mistral.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://api.runmodelrun.com&size=256" }, - "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" - ], - "isAbortable": false, + "ignoredProviderModels": [], + "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "ModelRun", + "owners": ["org_398AY9gETwDQkj8n6SXr9rA6HL0"], + "slug": "modelrun", + "statusPageUrl": null }, - "provider_model_id": "mistral-large-2512", - "provider_name": "Mistral", + "provider_model_id": "deepseek/deepseek-r1-0528", + "provider_name": "ModelRun", "provider_region": null, - "provider_slug": "mistral", - "quantization": "unknown", + "provider_slug": "modelrun/fp4", + "quantization": "fp4", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", - "top_p", - "stop", - "frequency_penalty", "presence_penalty", - "seed", - "response_format", - "structured_outputs", - "tools", - "tool_choice" + "repetition_penalty", + "frequency_penalty", + "top_p", + "top_k", + "min_p", + "stop" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_reasoning": true, + "supports_tool_parameters": false, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Mistral", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": null, + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "Mistral: Mistral Large 3 2512", + "name": "DeepSeek: R1 0528 (free)", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-large-2512", + "permaslug": "deepseek/deepseek-r1-0528", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Mistral Large 3 2512", - "slug": "mistralai/mistral-large-2512", - "updated_at": "2025-12-02T15:19:24.969824+00:00", + "short_name": "R1 0528 (free)", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null - }, + } + ], + "name": "ModelRun", + "slug": "modelrun" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "datacenters": ["SG"], + "displayName": "Moonshot AI", + "headquarters": "SG", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://moonshot.ai&size=256" + }, + "models": [ { - "author": "mistralai", + "author": "moonshotai", "context_length": 131072, - "created_at": "2025-05-07T14:15:41.980763+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "created_at": "2025-07-11T19:47:32.565514+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { - "adapter_name": "MistralAdapter", + "adapter_name": "MoonshotAdapter", "can_abort": false, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -91359,132 +91161,112 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "9d5ba5bf-8465-46df-9185-1330820338f5", + "id": "92d2b1ae-c007-4919-840e-16b2185bcf91", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 50, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", + "author": "moonshotai", "context_length": 131072, - "created_at": "2025-05-07T14:15:41.980763+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "created_at": "2025-07-11T19:47:32.565514+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", - "features": {}, - "group": "Mistral", + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "moonshotai/Kimi-K2-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Medium 3", + "name": "MoonshotAI: Kimi K2 0711", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-medium-3", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mistral Medium 3", - "slug": "mistralai/mistral-medium-3", + "short_name": "Kimi K2 0711", + "slug": "moonshotai/kimi-k2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-medium-3", - "model_variant_slug": "mistralai/mistral-medium-3", + "model_variant_permaslug": "moonshotai/kimi-k2", + "model_variant_slug": "moonshotai/kimi-k2", "moderation_required": false, - "name": "Mistral | mistralai/mistral-medium-3", + "name": "Moonshot AI | moonshotai/kimi-k2", "pricing": { - "completion": "0.000002", + "completion": "0.0000025", "discount": 0, - "prompt": "0.0000004" + "input_cache_read": "0.00000015", + "prompt": "0.0000006" }, - "provider_display_name": "Mistral", + "provider_display_name": "Moonshot AI", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", + "adapterName": "MoonshotAdapter", + "baseUrl": "https://api.moonshot.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", "training": false }, - "displayName": "Mistral", - "editors": ["{}"], + "displayName": "Moonshot AI", + "editors": [], "hasChatCompletions": true, "hasCompletions": false, - "headquarters": "FR", + "headquarters": "SG", "icon": { - "url": "/images/icons/Mistral.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://moonshot.ai&size=256" }, - "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" - ], + "ignoredProviderModels": [], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "Moonshot AI", + "owners": [], + "slug": "moonshotai", + "statusPageUrl": null }, - "provider_model_id": "mistral-medium-2505", - "provider_name": "Mistral", + "provider_model_id": "kimi-k2-0711-preview", + "provider_name": "Moonshot AI", "provider_region": null, - "provider_slug": "mistral", - "quantization": "unknown", + "provider_slug": "moonshotai/fp8", + "quantization": "fp8", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "response_format", - "structured_outputs", "tools", - "tool_choice" + "tool_choice", + "logprobs", + "top_logprobs" ], "supports_multipart": true, "supports_reasoning": false, @@ -91492,48 +91274,60 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "moonshotai/Kimi-K2-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Medium 3", + "name": "MoonshotAI: Kimi K2 0711", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-medium-3", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mistral Medium 3", - "slug": "mistralai/mistral-medium-3", + "short_name": "Kimi K2 0711", + "slug": "moonshotai/kimi-k2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 131072, - "created_at": "2025-08-13T14:33:59.459114+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3.1 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { - "adapter_name": "MistralAdapter", + "adapter_name": "MoonshotAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_implicit_caching": true, "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, @@ -91544,28 +91338,26 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "13c5002b-8c5b-490c-8a5f-52ec0242804f", + "id": "216b6cc1-a975-4173-bf76-4a4359c6bba5", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 50, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 131072, - "created_at": "2025-08-13T14:33:59.459114+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost compared to traditional large models, making it suitable for scalable deployments across professional and industrial use cases.\n\nThe model excels in domains such as coding, STEM reasoning, and enterprise adaptation. It supports hybrid, on-prem, and in-VPC deployments and is optimized for integration into custom workflows. Mistral Medium 3.1 offers competitive accuracy relative to larger models like Claude Sonnet 3.5/3.7, Llama 4 Maverick, and Command R+, while maintaining broad compatibility across cloud environments.", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "features": { "reasoning_config": { "end_token": null, @@ -91573,111 +91365,81 @@ "system_prompt": null } }, - "group": "Mistral", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Medium 3.1", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-medium-3.1", + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Mistral Medium 3.1", - "slug": "mistralai/mistral-medium-3.1", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-medium-3.1", - "model_variant_slug": "mistralai/mistral-medium-3.1", + "model_variant_permaslug": "moonshotai/kimi-k2-0905", + "model_variant_slug": "moonshotai/kimi-k2-0905", "moderation_required": false, - "name": "Mistral | mistralai/mistral-medium-3.1", + "name": "Moonshot AI | moonshotai/kimi-k2-0905", "pricing": { - "completion": "0.000002", + "completion": "0.0000025", "discount": 0, - "prompt": "0.0000004" + "input_cache_read": "0.00000015", + "prompt": "0.0000006" }, - "provider_display_name": "Mistral", + "provider_display_name": "Moonshot AI", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", + "adapterName": "MoonshotAdapter", + "baseUrl": "https://api.moonshot.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", "training": false }, - "displayName": "Mistral", - "editors": ["{}"], + "displayName": "Moonshot AI", + "editors": [], "hasChatCompletions": true, "hasCompletions": false, - "headquarters": "FR", + "headquarters": "SG", "icon": { - "url": "/images/icons/Mistral.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://moonshot.ai&size=256" }, - "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" - ], + "ignoredProviderModels": [], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "Moonshot AI", + "owners": [], + "slug": "moonshotai", + "statusPageUrl": null }, - "provider_model_id": "mistral-medium-2508", - "provider_name": "Mistral", + "provider_model_id": "kimi-k2-0905-preview", + "provider_name": "Moonshot AI", "provider_region": null, - "provider_slug": "mistral", + "provider_slug": "moonshotai", "quantization": "unknown", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "response_format", - "structured_outputs", "tools", "tool_choice" ], @@ -91694,51 +91456,55 @@ "system_prompt": null } }, - "group": "Mistral", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Medium 3.1", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-medium-3.1", + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Mistral Medium 3.1", - "slug": "mistralai/mistral-medium-3.1", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 131072, - "created_at": "2024-07-19T00:00:00+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "endpoint": { - "adapter_name": "MistralAdapter", + "adapter_name": "MoonshotAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", "training": false }, "features": { - "supported_parameters": {}, + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -91748,7 +91514,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "2dfff8cc-cb8c-451d-8ca4-1356ad56218d", + "id": "4e55441a-ce9e-4be6-b4d2-3ed42913b7f9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -91757,171 +91523,170 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 131072, - "created_at": "2024-07-19T00:00:00+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", - "features": {}, - "group": "Mistral", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Nemo", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-nemo", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral Nemo", - "slug": "mistralai/mistral-nemo", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-nemo", - "model_variant_slug": "mistralai/mistral-nemo", + "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", + "model_variant_slug": "moonshotai/kimi-k2-thinking", "moderation_required": false, - "name": "Mistral | mistralai/mistral-nemo", + "name": "Moonshot AI | moonshotai/kimi-k2-thinking-20251106", "pricing": { - "completion": "0.00000015", + "completion": "0.0000025", "discount": 0, - "prompt": "0.00000015" + "input_cache_read": "0.00000015", + "prompt": "0.0000006" }, - "provider_display_name": "Mistral", + "provider_display_name": "Moonshot AI", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", + "adapterName": "MoonshotAdapter", + "baseUrl": "https://api.moonshot.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", "training": false }, - "displayName": "Mistral", - "editors": ["{}"], + "displayName": "Moonshot AI", + "editors": [], "hasChatCompletions": true, "hasCompletions": false, - "headquarters": "FR", + "headquarters": "SG", "icon": { - "url": "/images/icons/Mistral.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://moonshot.ai&size=256" }, - "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" - ], + "ignoredProviderModels": [], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "Moonshot AI", + "owners": [], + "slug": "moonshotai", + "statusPageUrl": null }, - "provider_model_id": "open-mistral-nemo-2407", - "provider_name": "Mistral", + "provider_model_id": "kimi-k2-thinking", + "provider_name": "Moonshot AI", "provider_region": null, - "provider_slug": "mistral", - "quantization": "unknown", + "provider_slug": "moonshotai/int4", + "quantization": "int4", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "response_format", - "structured_outputs", + "tool_choice", "tools", - "tool_choice" + "structured_outputs", + "response_format" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Nemo", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-nemo", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral Nemo", - "slug": "mistralai/mistral-nemo", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 131072, - "created_at": "2025-06-20T18:10:16.960494+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { - "adapter_name": "MistralAdapter", + "adapter_name": "MoonshotAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -91931,192 +91696,200 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "23fd6b91-74d8-45fc-ac00-cf141ec7f4a5", + "id": "49762299-66fd-4514-b0ee-c9e11e231e3b", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 100, + "limit_rpm": null, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 128000, - "created_at": "2025-06-20T18:10:16.960494+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", - "features": {}, - "group": "Mistral", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3.2 24B", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2.5-0127", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "Mistral Small 3.2 24B", - "slug": "mistralai/mistral-small-3.2-24b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "model_variant_slug": "mistralai/mistral-small-3.2-24b-instruct", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "Mistral | mistralai/mistral-small-3.2-24b-instruct-2506", + "name": "Moonshot AI | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.0000003", + "completion": "0.000003", "discount": 0, - "prompt": "0.0000001" + "input_cache_read": "0.0000001", + "prompt": "0.0000006" }, - "provider_display_name": "Mistral", + "provider_display_name": "Moonshot AI", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", + "adapterName": "MoonshotAdapter", + "baseUrl": "https://api.moonshot.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", "training": false }, - "displayName": "Mistral", - "editors": ["{}"], + "displayName": "Moonshot AI", + "editors": [], "hasChatCompletions": true, "hasCompletions": false, - "headquarters": "FR", + "headquarters": "SG", "icon": { - "url": "/images/icons/Mistral.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://moonshot.ai&size=256" }, - "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" - ], + "ignoredProviderModels": [], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "Moonshot AI", + "owners": [], + "slug": "moonshotai", + "statusPageUrl": null }, - "provider_model_id": "mistral-small-2506", - "provider_name": "Mistral", + "provider_model_id": "kimi-k2.5", + "provider_name": "Moonshot AI", "provider_region": null, - "provider_slug": "mistral", + "provider_slug": "moonshotai", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", - "temperature", - "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "response_format", "structured_outputs", - "tools", - "tool_choice" + "response_format", + "tool_choice", + "tools" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3.2 24B", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2.5-0127", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "Mistral Small 3.2 24B", - "slug": "mistralai/mistral-small-3.2-24b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null - }, + } + ], + "name": "Moonshot AI", + "slug": "moonshotai" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "datacenters": ["US"], + "displayName": "Morph", + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://morphllm.com&size=256" + }, + "models": [ { - "author": "mistralai", - "context_length": 32768, - "created_at": "2025-12-16T18:10:53.032181+00:00", + "author": "morph", + "context_length": 81920, + "created_at": "2025-07-07T17:40:02.233313+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.3, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral Small Creative is an experimental small model designed for creative writing, narrative generation, roleplay and character-driven dialogue, general-purpose instruction following, and conversational agents.", + "description": "Morph's fastest apply model for code edits. ~10,500 tokens/sec with 96% accuracy for rapid code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", "endpoint": { - "adapter_name": "MistralAdapter", - "can_abort": false, - "context_length": 32768, + "adapter_name": "MorphAdapter", + "can_abort": true, + "context_length": 81920, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://morphllm.com/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.morphllm.com/privacy/tos", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, - "literal_none": false, + "literal_none": true, "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": false, - "id": "0118a65e-79f9-48b0-924e-38c4a41378e1", + "has_completions": true, + "id": "f636f582-98c3-45dd-b468-0b9b0973b1f6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -92125,32 +91898,30 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 38000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32768, - "created_at": "2025-12-16T18:10:53.032181+00:00", + "author": "morph", + "context_length": 81920, + "created_at": "2025-07-07T17:40:02.233313+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.3, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral Small Creative is an experimental small model designed for creative writing, narrative generation, roleplay and character-driven dialogue, general-purpose instruction following, and conversational agents.", + "description": "Morph's fastest apply model for code edits. ~10,500 tokens/sec with 96% accuracy for rapid code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Mistral", + "group": "Other", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, @@ -92158,111 +91929,79 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small Creative", + "name": "Morph: Morph V3 Fast", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-creative-20251216", + "permaslug": "morph/morph-v3-fast", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Mistral Small Creative", - "slug": "mistralai/mistral-small-creative", - "updated_at": "2025-12-16T20:02:56.287656+00:00", + "short_name": "Morph V3 Fast", + "slug": "morph/morph-v3-fast", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-small-creative-20251216", - "model_variant_slug": "mistralai/mistral-small-creative", + "model_variant_permaslug": "morph/morph-v3-fast", + "model_variant_slug": "morph/morph-v3-fast", "moderation_required": false, - "name": "Mistral | mistralai/mistral-small-creative-20251216", + "name": "Morph | morph/morph-v3-fast", "pricing": { - "completion": "0.0000003", + "completion": "0.0000012", "discount": 0, - "prompt": "0.0000001" + "prompt": "0.0000008" }, - "provider_display_name": "Mistral", + "provider_display_name": "Morph", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", + "adapterName": "MorphAdapter", + "baseUrl": "https://api.morphllm.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://morphllm.com/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.morphllm.com/privacy/tos", "training": false }, - "displayName": "Mistral", - "editors": ["{}"], + "displayName": "Morph", + "editors": [], "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "FR", + "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "/images/icons/Mistral.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://morphllm.com&size=256" }, - "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" - ], - "isAbortable": false, - "isMultipartSupported": true, + "ignoredProviderModels": ["morph-v3-fast"], + "isAbortable": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "Morph", + "owners": [], + "slug": "morph", + "statusPageUrl": null }, - "provider_model_id": "labs-mistral-small-creative", - "provider_name": "Mistral", + "provider_model_id": "morph-v3-fast", + "provider_name": "Morph", "provider_region": null, - "provider_slug": "mistral", + "provider_slug": "morph", "quantization": "unknown", - "supported_parameters": ["tools", "tool_choice"], - "supports_multipart": true, + "supported_parameters": ["max_tokens", "temperature", "stop"], + "supports_multipart": false, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Mistral", + "group": "Other", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, @@ -92270,42 +92009,45 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small Creative", + "name": "Morph: Morph V3 Fast", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-creative-20251216", + "permaslug": "morph/morph-v3-fast", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Mistral Small Creative", - "slug": "mistralai/mistral-small-creative", - "updated_at": "2025-12-16T20:02:56.287656+00:00", + "short_name": "Morph V3 Fast", + "slug": "morph/morph-v3-fast", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 65536, - "created_at": "2024-04-17T00:00:00+00:00", + "author": "morph", + "context_length": 262144, + "created_at": "2025-07-07T17:54:18.685519+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:\n- strong math, coding, and reasoning\n- large context length (64k)\n- fluency in English, French, Italian, German, and Spanish\n\nSee benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).\n#moe", + "description": "Morph's high-accuracy apply model for complex code edits. ~4,500 tokens/sec with 98% accuracy for precise code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", "endpoint": { - "adapter_name": "MistralAdapter", - "can_abort": false, - "context_length": 65536, + "adapter_name": "MorphAdapter", + "can_abort": true, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://morphllm.com/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.morphllm.com/privacy/tos", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -92314,8 +92056,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "e55a0c16-08a0-4871-ae6a-97482cc3231a", + "has_completions": true, + "id": "df8da1e4-a390-4020-a25f-d12d77f65b22", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -92324,168 +92066,165 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 65536, - "created_at": "2024-04-17T00:00:00+00:00", + "author": "morph", + "context_length": 81920, + "created_at": "2025-07-07T17:54:18.685519+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include:\n- strong math, coding, and reasoning\n- large context length (64k)\n- fluency in English, French, Italian, German, and Spanish\n\nSee benchmarks on the launch announcement [here](https://mistral.ai/news/mixtral-8x22b/).\n#moe", - "features": {}, - "group": "Mistral", + "description": "Morph's high-accuracy apply model for complex code edits. ~4,500 tokens/sec with 98% accuracy for precise code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mixtral-8x22B-Instruct-v0.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mixtral 8x22B Instruct", + "name": "Morph: Morph V3 Large", "output_modalities": ["text"], - "permaslug": "mistralai/mixtral-8x22b-instruct", - "reasoning_config": null, + "permaslug": "morph/morph-v3-large", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mixtral 8x22B Instruct", - "slug": "mistralai/mixtral-8x22b-instruct", + "short_name": "Morph V3 Large", + "slug": "morph/morph-v3-large", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mixtral-8x22b-instruct", - "model_variant_slug": "mistralai/mixtral-8x22b-instruct", + "model_variant_permaslug": "morph/morph-v3-large", + "model_variant_slug": "morph/morph-v3-large", "moderation_required": false, - "name": "Mistral | mistralai/mixtral-8x22b-instruct", + "name": "Morph | morph/morph-v3-large", "pricing": { - "completion": "0.000006", + "completion": "0.0000019", "discount": 0, - "prompt": "0.000002" + "prompt": "0.0000009" }, - "provider_display_name": "Mistral", + "provider_display_name": "Morph", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", + "adapterName": "MorphAdapter", + "baseUrl": "https://api.morphllm.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://morphllm.com/privacy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.morphllm.com/privacy/tos", "training": false }, - "displayName": "Mistral", - "editors": ["{}"], + "displayName": "Morph", + "editors": [], "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "FR", + "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "/images/icons/Mistral.png" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://morphllm.com&size=256" }, - "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" - ], - "isAbortable": false, - "isMultipartSupported": true, + "ignoredProviderModels": ["morph-v3-fast"], + "isAbortable": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "Morph", + "owners": [], + "slug": "morph", + "statusPageUrl": null }, - "provider_model_id": "open-mixtral-8x22b", - "provider_name": "Mistral", + "provider_model_id": "morph-v3-large", + "provider_name": "Morph", "provider_region": null, - "provider_slug": "mistral", + "provider_slug": "morph", "quantization": "unknown", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "response_format", - "structured_outputs", - "tools", - "tool_choice" - ], - "supports_multipart": true, + "supported_parameters": ["max_tokens", "temperature", "stop"], + "supports_multipart": false, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mixtral-8x22B-Instruct-v0.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mixtral 8x22B Instruct", + "name": "Morph: Morph V3 Large", "output_modalities": ["text"], - "permaslug": "mistralai/mixtral-8x22b-instruct", - "reasoning_config": null, + "permaslug": "morph/morph-v3-large", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mixtral 8x22B Instruct", - "slug": "mistralai/mixtral-8x22b-instruct", + "short_name": "Morph V3 Large", + "slug": "morph/morph-v3-large", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - }, + } + ], + "name": "Morph", + "slug": "morph" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Nebius Token Factory", + "headquarters": "NL", + "icon": { + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + }, + "models": [ { - "author": "mistralai", - "context_length": 131072, - "created_at": "2024-09-10T00:00:00+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "endpoint": { - "adapter_name": "MistralAdapter", + "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { @@ -92498,8 +92237,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "e1d77a4e-3a3d-4dc4-9039-4d311dcb4dba", + "has_completions": true, + "id": "09c158fc-63e9-4c3a-813e-c952828ac296", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -92512,117 +92251,95 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 4096, - "created_at": "2024-09-10T00:00:00+00:00", - "default_parameters": { - "temperature": 0.3 - }, + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "features": {}, - "group": "Mistral", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "mistralai/Pixtral-12B-2409", + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Mistral: Pixtral 12B", + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", "output_modalities": ["text"], - "permaslug": "mistralai/pixtral-12b", + "permaslug": "deepseek/deepseek-chat-v3-0324", "reasoning_config": null, "router": null, - "short_name": "Pixtral 12B", - "slug": "mistralai/pixtral-12b", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/pixtral-12b", - "model_variant_slug": "mistralai/pixtral-12b", + "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", + "model_variant_slug": "deepseek/deepseek-chat-v3-0324", "moderation_required": false, - "name": "Mistral | mistralai/pixtral-12b", + "name": "Nebius | deepseek/deepseek-chat-v3-0324", "pricing": { - "completion": "0.00000015", + "completion": "0.0000015", "discount": 0, - "prompt": "0.00000015" + "prompt": "0.0000005" }, - "provider_display_name": "Mistral", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Mistral", + "displayName": "Nebius Token Factory", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "FR", + "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "/images/icons/Mistral.png" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", + "statusPageUrl": null }, - "provider_model_id": "pixtral-12b-2409", - "provider_name": "Mistral", + "provider_model_id": "deepseek-ai/DeepSeek-V3-0324", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "mistral", - "quantization": "unknown", + "provider_slug": "nebius/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", - "response_format", - "structured_outputs", + "top_k", + "repetition_penalty", "tools", "tool_choice" ], @@ -92633,47 +92350,48 @@ "variant": "standard" }, "features": {}, - "group": "Mistral", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "mistralai/Pixtral-12B-2409", + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": null, - "name": "Mistral: Pixtral 12B", + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", "output_modalities": ["text"], - "permaslug": "mistralai/pixtral-12b", + "permaslug": "deepseek/deepseek-chat-v3-0324", "reasoning_config": null, "router": null, - "short_name": "Pixtral 12B", - "slug": "mistralai/pixtral-12b", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 131072, - "created_at": "2024-11-19T00:49:48.873161+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images.\n\nThe model is available under the Mistral Research License (MRL) for research and educational use, and the Mistral Commercial License for experimentation, testing, and production for commercial purposes.\n\n", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "endpoint": { - "adapter_name": "MistralAdapter", + "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -92682,8 +92400,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "1a41639e-c1cf-422e-a871-27bc67f03928", + "has_completions": true, + "id": "5d263135-78cb-4d45-a44e-d52c19da5139", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -92696,168 +92414,175 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 128000, - "created_at": "2024-11-19T00:49:48.873161+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images.\n\nThe model is available under the Mistral Research License (MRL) for research and educational use, and the Mistral Commercial License for experimentation, testing, and production for commercial purposes.\n\n", - "features": {}, - "group": "Mistral", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": null, + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "Mistral: Pixtral Large 2411", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "mistralai/pixtral-large-2411", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-r1-0528", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Pixtral Large 2411", - "slug": "mistralai/pixtral-large-2411", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/pixtral-large-2411", - "model_variant_slug": "mistralai/pixtral-large-2411", + "model_variant_permaslug": "deepseek/deepseek-r1-0528", + "model_variant_slug": "deepseek/deepseek-r1-0528", "moderation_required": false, - "name": "Mistral | mistralai/pixtral-large-2411", + "name": "Nebius | deepseek/deepseek-r1-0528", "pricing": { - "completion": "0.000006", + "completion": "0.0000024", "discount": 0, - "prompt": "0.000002" + "prompt": "0.0000008" }, - "provider_display_name": "Mistral", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Mistral", + "displayName": "Nebius Token Factory", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "FR", + "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "/images/icons/Mistral.png" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", + "statusPageUrl": null }, - "provider_model_id": "pixtral-large-2411", - "provider_name": "Mistral", + "provider_model_id": "deepseek-ai/DeepSeek-R1-0528", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "mistral", - "quantization": "unknown", + "provider_slug": "nebius/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", - "response_format", - "structured_outputs", + "top_k", + "repetition_penalty", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": null, + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "Mistral: Pixtral Large 2411", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "mistralai/pixtral-large-2411", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-r1-0528", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Pixtral Large 2411", - "slug": "mistralai/pixtral-large-2411", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32768, - "created_at": "2025-02-17T14:40:39.116446+00:00", - "default_parameters": { - "temperature": 0.3 - }, - "default_stops": [], + "author": "google", + "context_length": 8192, + "created_at": "2024-06-28T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["", "", ""], "default_system": null, - "description": "Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance. Trained on curated regional datasets, it supports multiple Indian-origin languages—including Tamil and Malayalam—alongside Arabic. This makes it a versatile option for a range of regional and multilingual applications. Read more at the blog post [here](https://mistral.ai/en/news/mistral-saba)", + "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", "endpoint": { - "adapter_name": "MistralAdapter", + "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 32768, + "context_length": 8192, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -92866,8 +92591,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "c5a8bebe-8564-47ed-9d05-4151aa3c6e3a", + "has_completions": true, + "id": "a8806d5f-31c2-4f08-9cd7-a39cfec2f648", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -92880,171 +92605,146 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32000, - "created_at": "2025-02-17T14:40:39.116446+00:00", - "default_parameters": { - "temperature": 0.3 - }, - "default_stops": [], + "author": "google", + "context_length": 8192, + "created_at": "2024-06-28T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["", "", ""], "default_system": null, - "description": "Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance. Trained on curated regional datasets, it supports multiple Indian-origin languages—including Tamil and Malayalam—alongside Arabic. This makes it a versatile option for a range of regional and multilingual applications. Read more at the blog post [here](https://mistral.ai/en/news/mistral-saba)", + "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", "features": {}, - "group": "Mistral", + "group": "Gemini", "has_text_output": true, - "hf_slug": null, + "hf_slug": "google/gemma-2-9b-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "gemma", "model_version_group_id": null, - "name": "Mistral: Saba", + "name": "Google: Gemma 2 9B", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-saba-2502", + "permaslug": "google/gemma-2-9b-it", "reasoning_config": null, "router": null, - "short_name": "Saba", - "slug": "mistralai/mistral-saba", + "short_name": "Gemma 2 9B", + "slug": "google/gemma-2-9b-it", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-saba-2502", - "model_variant_slug": "mistralai/mistral-saba", + "model_variant_permaslug": "google/gemma-2-9b-it", + "model_variant_slug": "google/gemma-2-9b-it", "moderation_required": false, - "name": "Mistral | mistralai/mistral-saba-2502", + "name": "Nebius | google/gemma-2-9b-it", "pricing": { - "completion": "0.0000006", + "completion": "0.00000009", "discount": 0, - "prompt": "0.0000002" + "prompt": "0.00000003" }, - "provider_display_name": "Mistral", + "provider_display_name": "Nebius AI Studio (Fast)", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Mistral", + "displayName": "Nebius AI Studio (Fast)", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "FR", + "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "/images/icons/Mistral.png" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius/fast", + "statusPageUrl": null }, - "provider_model_id": "mistral-saba-2502", - "provider_name": "Mistral", + "provider_model_id": "google/gemma-2-9b-it-fast", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "mistral", - "quantization": "unknown", + "provider_slug": "nebius/fast", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", - "response_format", - "structured_outputs", - "tools", - "tool_choice" + "top_k", + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Mistral", + "group": "Gemini", "has_text_output": true, - "hf_slug": null, + "hf_slug": "google/gemma-2-9b-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "gemma", "model_version_group_id": null, - "name": "Mistral: Saba", + "name": "Google: Gemma 2 9B", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-saba-2502", + "permaslug": "google/gemma-2-9b-it", "reasoning_config": null, "router": null, - "short_name": "Saba", - "slug": "mistralai/mistral-saba", + "short_name": "Gemma 2 9B", + "slug": "google/gemma-2-9b-it", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32000, - "created_at": "2025-10-30T14:39:04+00:00", + "author": "google", + "context_length": 110000, + "created_at": "2025-03-12T05:12:39.645813+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.2, - "top_p": 0.95 + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["", "", ""], "default_system": null, - "description": "Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding. Input audio is priced at $100 per million seconds.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "endpoint": { - "adapter_name": "MistralAdapter", + "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 32000, + "context_length": 110000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { "supported_parameters": {}, - "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -93054,7 +92754,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "814f9be0-e226-4b8c-bb5d-bf684359da13", + "id": "db3af039-8c6e-4b32-bbf5-c858d644d1b5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -93067,17 +92767,17 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32000, - "created_at": "2025-10-30T14:39:04+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-12T05:12:39.645813+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.2, - "top_p": 0.95 + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["", "", ""], "default_system": null, - "description": "Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding. Input audio is priced at $100 per million seconds.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "features": { "chat_template_config": {}, "reasoning_config": { @@ -93086,117 +92786,87 @@ "system_prompt": null } }, - "group": "Mistral", + "group": "Gemini", "has_text_output": true, - "hf_slug": "mistralai/Voxtral-Small-24B-2507", + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "audio"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Mistral: Voxtral Small 24B 2507", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B", "output_modalities": ["text"], - "permaslug": "mistralai/voxtral-small-24b-2507", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Voxtral Small 24B 2507", - "slug": "mistralai/voxtral-small-24b-2507", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemma 3 27B", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/voxtral-small-24b-2507", - "model_variant_slug": "mistralai/voxtral-small-24b-2507", + "model_variant_permaslug": "google/gemma-3-27b-it", + "model_variant_slug": "google/gemma-3-27b-it", "moderation_required": false, - "name": "Mistral | mistralai/voxtral-small-24b-2507", + "name": "Nebius | google/gemma-3-27b-it", "pricing": { "completion": "0.0000003", "discount": 0, "prompt": "0.0000001" }, - "provider_display_name": "Mistral", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "MistralAdapter", - "baseUrl": "https://api.mistral.ai/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://mistral.ai/terms/#privacy-policy", - "retainsPrompts": true, - "termsOfServiceURL": "https://mistral.ai/terms/#terms-of-use", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Mistral", + "displayName": "Nebius Token Factory", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "FR", + "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "/images/icons/Mistral.png" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, "ignoredProviderModels": [ - "mistral-moderation-2411-all", - "voxtral-mini-2507", - "voxtral-small-2507", - "voxtral-mini-transcribe-2507", - "mistral-medium", - "mistral-tiny", - "mistral-tiny-2312", - "open-mistral-nemo", - "mistral-tiny-2407", - "open-mixtral-8x7b", - "mistral-small", - "mistral-small-2312", - "open-mixtral-8x22b-2404", - "mistral-large-pixtral-2411", - "codestral-2412", - "codestral-2411-rc5", - "pixtral-12b", - "mistral-moderation-2411", - "mistral-ocr-2503", - "mistral-ocr-2505", - "mistral-saba-2502", - "open-mixtral-8x22b", - "mistral-large-2407", - "magistral-medium-2507", - "mistral-embed", - "codestral-embed", - "codestral-2501", - "mistral-small-2501", - "mistral-ocr-2512" + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Mistral", - "owners": ["{}"], - "slug": "mistral", - "statusPageUrl": "https://status.mistral.ai/" + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", + "statusPageUrl": null }, - "provider_model_id": "voxtral-small-2507", - "provider_name": "Mistral", + "provider_model_id": "google/gemma-3-27b-it", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "mistral", - "quantization": "unknown", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "response_format", - "structured_outputs", - "tools", - "tool_choice" - ], + "provider_slug": "nebius/fp8", + "quantization": "fp8", + "supported_parameters": ["max_tokens"], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -93208,84 +92878,49 @@ "system_prompt": null } }, - "group": "Mistral", + "group": "Gemini", "has_text_output": true, - "hf_slug": "mistralai/Voxtral-Small-24B-2507", + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "audio"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Mistral: Voxtral Small 24B 2507", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B", "output_modalities": ["text"], - "permaslug": "mistralai/voxtral-small-24b-2507", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Voxtral Small 24B 2507", - "slug": "mistralai/voxtral-small-24b-2507", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemma 3 27B", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null - } - ], - "name": "Mistral", - "slug": "mistral" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "datacenters": ["US"], - "displayName": "ModelRun", - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://api.runmodelrun.com&size=256" - }, - "models": [], - "name": "ModelRun", - "slug": "modelrun" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "datacenters": ["SG"], - "displayName": "Moonshot AI", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://moonshot.ai&size=256" - }, - "models": [ + }, { - "author": "moonshotai", + "author": "meta-llama", "context_length": 131072, - "created_at": "2025-07-11T19:47:32.565514+00:00", + "created_at": "2025-02-12T23:01:58.468577+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", + "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n", "endpoint": { - "adapter_name": "MoonshotAdapter", + "adapter_name": "NebiusAdapter", "can_abort": false, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", "retainsPrompts": false, - "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -93294,117 +92929,265 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "92d2b1ae-c007-4919-840e-16b2185bcf91", + "has_completions": true, + "id": "b2fbbc15-67f2-4b72-94a0-112f1591f295", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 50, + "limit_rpm": null, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 131072, - "created_at": "2025-07-11T19:47:32.565514+00:00", + "author": "meta-llama", + "context_length": 0, + "created_at": "2025-02-12T23:01:58.468577+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct", + "hf_slug": "meta-llama/Llama-Guard-3-8B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "none", "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0711", + "name": "Llama Guard 3 8B", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "permaslug": "meta-llama/llama-guard-3-8b", + "reasoning_config": null, + "router": null, + "short_name": "Llama Guard 3 8B", + "slug": "meta-llama/llama-guard-3-8b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "meta-llama/llama-guard-3-8b", + "model_variant_slug": "meta-llama/llama-guard-3-8b", + "moderation_required": false, + "name": "Nebius | meta-llama/llama-guard-3-8b", + "pricing": { + "completion": "0.00000006", + "discount": 0, + "prompt": "0.00000002" + }, + "provider_display_name": "Nebius Token Factory", + "provider_info": { + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "training": false + }, + "displayName": "Nebius Token Factory", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": true, + "headquarters": "NL", + "icon": { + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + }, + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], + "isAbortable": false, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", + "statusPageUrl": null + }, + "provider_model_id": "meta-llama/Llama-Guard-3-8B", + "provider_name": "Nebius", + "provider_region": null, + "provider_slug": "nebius", + "quantization": "unknown", + "supported_parameters": ["max_tokens"], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": false, + "variable_pricings": [], + "variant": "standard" + }, + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-Guard-3-8B", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "none", + "model_version_group_id": null, + "name": "Llama Guard 3 8B", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-guard-3-8b", + "reasoning_config": null, + "router": null, + "short_name": "Llama Guard 3 8B", + "slug": "meta-llama/llama-guard-3-8b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + { + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_system": null, + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "endpoint": { + "adapter_name": "NebiusAdapter", + "can_abort": false, + "context_length": 131072, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "training": false + }, + "features": { + "supported_parameters": { + "structured_outputs": false }, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "474a9b4c-3ad1-403a-b84a-763335ae8f61", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_system": null, + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.1-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 0711", - "slug": "moonshotai/kimi-k2", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2", - "model_variant_slug": "moonshotai/kimi-k2", + "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", "moderation_required": false, - "name": "Moonshot AI | moonshotai/kimi-k2", + "name": "Nebius | meta-llama/llama-3.1-8b-instruct", "pricing": { - "completion": "0.0000025", + "completion": "0.00000006", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000015", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000002" }, - "provider_display_name": "Moonshot AI", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "MoonshotAdapter", - "baseUrl": "https://api.moonshot.ai/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", "retainsPrompts": false, - "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Moonshot AI", - "editors": [], + "displayName": "Nebius Token Factory", + "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://moonshot.ai&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Moonshot AI", - "owners": [], - "slug": "moonshotai", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "kimi-k2-0711-preview", - "provider_name": "Moonshot AI", + "provider_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "moonshotai/fp8", + "provider_slug": "nebius/fp8", "quantization": "fp8", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", + "top_k", + "repetition_penalty", "tools", "tool_choice", - "logprobs", - "top_logprobs" + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, @@ -93412,61 +93195,45 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct", + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0711", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.1-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 0711", - "slug": "moonshotai/kimi-k2", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { - "adapter_name": "MoonshotAdapter", + "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", "retainsPrompts": false, - "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_implicit_caching": true, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -93475,113 +93242,109 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "216b6cc1-a975-4173-bf76-4a4359c6bba5", + "has_completions": true, + "id": "5f78cde2-65d7-4e4a-86a2-79f702973039", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 50, + "limit_rpm": null, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-0905", - "model_variant_slug": "moonshotai/kimi-k2-0905", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "Moonshot AI | moonshotai/kimi-k2-0905", + "name": "Nebius | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0.0000025", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000015", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000013" }, - "provider_display_name": "Moonshot AI", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "MoonshotAdapter", - "baseUrl": "https://api.moonshot.ai/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", "retainsPrompts": false, - "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Moonshot AI", - "editors": [], + "displayName": "Nebius Token Factory", + "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://moonshot.ai&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Moonshot AI", - "owners": [], - "slug": "moonshotai", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "kimi-k2-0905-preview", - "provider_name": "Moonshot AI", + "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "moonshotai", - "quantization": "unknown", + "provider_slug": "nebius/fp8", + "quantization": "fp8", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", + "top_k", + "repetition_penalty", "tools", "tool_choice" ], @@ -93591,72 +93354,60 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "author": "minimax", + "context_length": 196608, + "created_at": "2025-12-23T01:56:37+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.9 }, "default_stops": [], - "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "endpoint": { - "adapter_name": "MoonshotAdapter", + "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 196608, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", "retainsPrompts": false, - "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "is_mandatory_reasoning": true, "reasoning_return_mechanism": "reasoning-content", - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": false, - "id": "4e55441a-ce9e-4be6-b4d2-3ed42913b7f9", + "has_completions": true, + "id": "8ea464a2-d7a7-4467-b255-5bcc0f203d06", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -93665,23 +93416,25 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "author": "minimax", + "context_length": 204800, + "created_at": "2025-12-23T01:56:37+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.9 }, "default_stops": [], - "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { "end_token": "", "start_token": "", @@ -93690,74 +93443,81 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "MiniMax: MiniMax M2.1", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "minimax/minimax-m2.1", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", + "model_variant_permaslug": "minimax/minimax-m2.1", + "model_variant_slug": "minimax/minimax-m2.1", "moderation_required": false, - "name": "Moonshot AI | moonshotai/kimi-k2-thinking-20251106", + "name": "Nebius | minimax/minimax-m2.1", "pricing": { - "completion": "0.0000025", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000015", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, - "provider_display_name": "Moonshot AI", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "MoonshotAdapter", - "baseUrl": "https://api.moonshot.ai/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://platform.moonshot.ai/docs/agreement/userprivacy", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", "retainsPrompts": false, - "termsOfServiceURL": "https://platform.moonshot.ai/docs/agreement/modeluse", + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Moonshot AI", - "editors": [], + "displayName": "Nebius Token Factory", + "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, + "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://moonshot.ai&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "Moonshot AI", - "owners": [], - "slug": "moonshotai", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "kimi-k2-thinking", - "provider_name": "Moonshot AI", + "provider_model_id": "MiniMaxAI/MiniMax-M2.1", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "moonshotai/int4", - "quantization": "int4", + "provider_slug": "nebius/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", @@ -93767,10 +93527,16 @@ "stop", "frequency_penalty", "presence_penalty", + "seed", + "top_k", + "logit_bias", + "logprobs", + "top_logprobs", "tool_choice", "tools", "structured_outputs", - "response_format" + "response_format", + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": true, @@ -93779,7 +93545,9 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { "end_token": "", "start_token": "", @@ -93788,68 +93556,46 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "MiniMax: MiniMax M2.1", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "minimax/minimax-m2.1", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null - } - ], - "name": "Moonshot AI", - "slug": "moonshotai" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "datacenters": ["US"], - "displayName": "Morph", - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://morphllm.com&size=256" - }, - "models": [ + }, { - "author": "morph", - "context_length": 81920, - "created_at": "2025-07-07T17:40:02.233313+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "moonshotai", + "context_length": 131072, + "created_at": "2025-07-11T19:47:32.565514+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Morph's fastest apply model for code edits. ~10,500 tokens/sec with 96% accuracy for rapid code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { - "adapter_name": "MorphAdapter", - "can_abort": true, - "context_length": 81920, + "adapter_name": "NebiusAdapter", + "can_abort": false, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://morphllm.com/privacy", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", "retainsPrompts": false, - "termsOfServiceURL": "https://www.morphllm.com/privacy/tos", + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -93859,7 +93605,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f636f582-98c3-45dd-b468-0b9b0973b1f6", + "id": "900353a5-eceb-48bc-91e5-de424b9cecc6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -93868,23 +93614,18 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 38000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "morph", - "context_length": 81920, - "created_at": "2025-07-07T17:40:02.233313+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "moonshotai", + "context_length": 131072, + "created_at": "2025-07-11T19:47:32.565514+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Morph's fastest apply model for code edits. ~10,500 tokens/sec with 96% accuracy for rapid code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -93893,83 +93634,99 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "moonshotai/Kimi-K2-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Morph: Morph V3 Fast", + "name": "MoonshotAI: Kimi K2 0711", "output_modalities": ["text"], - "permaslug": "morph/morph-v3-fast", + "permaslug": "moonshotai/kimi-k2", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Morph V3 Fast", - "slug": "morph/morph-v3-fast", + "short_name": "Kimi K2 0711", + "slug": "moonshotai/kimi-k2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "morph/morph-v3-fast", - "model_variant_slug": "morph/morph-v3-fast", + "model_variant_permaslug": "moonshotai/kimi-k2", + "model_variant_slug": "moonshotai/kimi-k2", "moderation_required": false, - "name": "Morph | morph/morph-v3-fast", + "name": "Nebius | moonshotai/kimi-k2", "pricing": { - "completion": "0.0000012", + "completion": "0.0000024", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000008", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, - "provider_display_name": "Morph", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "MorphAdapter", - "baseUrl": "https://api.morphllm.com/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://morphllm.com/privacy", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", "retainsPrompts": false, - "termsOfServiceURL": "https://www.morphllm.com/privacy/tos", + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Morph", - "editors": [], + "displayName": "Nebius Token Factory", + "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://morphllm.com&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": ["morph-v3-fast"], - "isAbortable": true, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], + "isAbortable": false, + "isMultipartSupported": true, "moderationRequired": false, - "name": "Morph", - "owners": [], - "slug": "morph", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "morph-v3-fast", - "provider_name": "Morph", + "provider_model_id": "moonshotai/Kimi-K2-Instruct", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "morph", - "quantization": "unknown", - "supported_parameters": ["max_tokens", "temperature", "stop"], - "supports_multipart": false, + "provider_slug": "nebius/fp4", + "quantization": "fp4", + "supported_parameters": [ + "max_tokens", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "top_k", + "repetition_penalty", + "tools", + "tool_choice" + ], + "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -93978,30 +93735,30 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "moonshotai/Kimi-K2-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Morph: Morph V3 Fast", + "name": "MoonshotAI: Kimi K2 0711", "output_modalities": ["text"], - "permaslug": "morph/morph-v3-fast", + "permaslug": "moonshotai/kimi-k2", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Morph V3 Fast", - "slug": "morph/morph-v3-fast", + "short_name": "Kimi K2 0711", + "slug": "moonshotai/kimi-k2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "morph", + "author": "moonshotai", "context_length": 262144, - "created_at": "2025-07-07T17:54:18.685519+00:00", + "created_at": "2025-11-06T14:50:22.752525+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -94009,20 +93766,21 @@ }, "default_stops": [], "default_system": null, - "description": "Morph's high-accuracy apply model for complex code edits. ~4,500 tokens/sec with 98% accuracy for precise code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "endpoint": { - "adapter_name": "MorphAdapter", - "can_abort": true, + "adapter_name": "NebiusAdapter", + "can_abort": false, "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://morphllm.com/privacy", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", "retainsPrompts": false, - "termsOfServiceURL": "https://www.morphllm.com/privacy/tos", + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "supported_parameters": {}, + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -94032,7 +93790,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "df8da1e4-a390-4020-a25f-d12d77f65b22", + "id": "5dfd92d5-ca14-4ea6-8667-76e5dd2187d4", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -94041,13 +93799,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "morph", - "context_length": 81920, - "created_at": "2025-07-07T17:54:18.685519+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -94055,162 +93813,167 @@ }, "default_stops": [], "default_system": null, - "description": "Morph's high-accuracy apply model for complex code edits. ~4,500 tokens/sec with 98% accuracy for precise code transformations.\n\nThe model requires the prompt to be in the following format: \n{instruction}\n{initial_code}\n{edit_snippet}\n\nZero Data Retention is enabled for Morph. Learn more about this model in their [documentation](https://docs.morphllm.com/quickstart)", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Morph: Morph V3 Large", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "morph/morph-v3-large", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Morph V3 Large", - "slug": "morph/morph-v3-large", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "morph/morph-v3-large", - "model_variant_slug": "morph/morph-v3-large", + "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", + "model_variant_slug": "moonshotai/kimi-k2-thinking", "moderation_required": false, - "name": "Morph | morph/morph-v3-large", + "name": "Nebius | moonshotai/kimi-k2-thinking-20251106", "pricing": { - "completion": "0.0000019", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000009", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, - "provider_display_name": "Morph", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "MorphAdapter", - "baseUrl": "https://api.morphllm.com/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://morphllm.com/privacy", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", "retainsPrompts": false, - "termsOfServiceURL": "https://www.morphllm.com/privacy/tos", + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Morph", - "editors": [], + "displayName": "Nebius Token Factory", + "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://morphllm.com&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": ["morph-v3-fast"], - "isAbortable": true, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], + "isAbortable": false, + "isMultipartSupported": true, "moderationRequired": false, - "name": "Morph", - "owners": [], - "slug": "morph", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "morph-v3-large", - "provider_name": "Morph", + "provider_model_id": "moonshotai/Kimi-K2-Thinking", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "morph", - "quantization": "unknown", - "supported_parameters": ["max_tokens", "temperature", "stop"], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": false, + "provider_slug": "nebius/fp8", + "quantization": "fp8", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "top_k", + "repetition_penalty", + "tools", + "tool_choice", + "response_format", + "structured_outputs" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Morph: Morph V3 Large", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "morph/morph-v3-large", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Morph V3 Large", - "slug": "morph/morph-v3-large", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null - } - ], - "name": "Morph", - "slug": "morph" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": false - }, - "displayName": "nCompass", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://console.ncompass.tech/&size=256" - }, - "models": [ + }, { - "author": "google", - "context_length": 131000, - "created_at": "2025-03-12T05:12:39.645813+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["", "", ""], + "author": "nousresearch", + "context_length": 131072, + "created_at": "2025-08-26T19:11:03.380181+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with ... traces or respond directly, offering flexibility between speed and depth. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": true, - "context_length": 131000, + "adapter_name": "NebiusAdapter", + "can_abort": false, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "structured_outputs": false + }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -94220,7 +93983,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "0d39fcd5-2052-49a9-9938-59bd11182b61", + "id": "c575da03-bbd8-4d9e-963d-497ea0ffcb13", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -94229,170 +93992,172 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-12T05:12:39.645813+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["", "", ""], + "author": "nousresearch", + "context_length": 0, + "created_at": "2025-08-26T19:11:03.380181+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with ... traces or respond directly, offering flexibility between speed and depth. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "", + "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." } }, - "group": "Gemini", + "group": "Other", "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "hf_slug": "NousResearch/Hermes-4-405B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Nous: Hermes 4 405B", "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", + "permaslug": "nousresearch/hermes-4-405b", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "", + "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." }, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "Hermes 4 405B", + "slug": "nousresearch/hermes-4-405b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-3-27b-it", - "model_variant_slug": "google/gemma-3-27b-it", + "model_variant_permaslug": "nousresearch/hermes-4-405b", + "model_variant_slug": "nousresearch/hermes-4-405b", "moderation_required": false, - "name": "NCompass | google/gemma-3-27b-it", + "name": "Nebius | nousresearch/hermes-4-405b", "pricing": { - "completion": "0.00000016", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.000001" }, - "provider_display_name": "nCompass", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.ncompass.tech/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "nCompass", + "displayName": "Nebius Token Factory", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://console.ncompass.tech/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": true, + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "NCompass", - "owners": ["{}"], - "slug": "ncompass", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "google/gemma-3-27b-it", - "provider_name": "NCompass", + "provider_model_id": "NousResearch/Hermes-4-405B", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "ncompass/fp8", + "provider_slug": "nebius/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", "top_k", - "min_p", - "repetition_penalty" + "repetition_penalty", + "response_format" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "", + "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." } }, - "group": "Gemini", + "group": "Other", "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "hf_slug": "NousResearch/Hermes-4-405B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Nous: Hermes 4 405B", "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", + "permaslug": "nousresearch/hermes-4-405b", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "", + "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." }, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "Hermes 4 405B", + "slug": "nousresearch/hermes-4-405b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131000, - "created_at": "2025-08-05T17:17:11+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "nousresearch", + "context_length": 131072, + "created_at": "2025-08-26T19:23:02.446988+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit ... reasoning traces before answering. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": true, - "context_length": 131000, + "adapter_name": "NebiusAdapter", + "can_abort": false, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { + "supported_parameters": { + "structured_outputs": false + }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -94402,7 +94167,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "2b0066f4-02f7-4c8a-94ad-5b09351e0e2b", + "id": "9e1bd3ed-059c-4fcf-9435-c1993ef564db", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -94411,174 +94176,169 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", + "author": "nousresearch", "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "created_at": "2025-08-26T19:23:02.446988+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit ... reasoning traces before answering. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": null + "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." } }, - "group": "GPT", + "group": "Llama3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "NousResearch/Hermes-4-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Nous: Hermes 4 70B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "nousresearch/hermes-4-70b", "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": null + "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Hermes 4 70B", + "slug": "nousresearch/hermes-4-70b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b", - "model_variant_slug": "openai/gpt-oss-120b", + "model_variant_permaslug": "nousresearch/hermes-4-70b", + "model_variant_slug": "nousresearch/hermes-4-70b", "moderation_required": false, - "name": "NCompass | openai/gpt-oss-120b", + "name": "Nebius | nousresearch/hermes-4-70b", "pricing": { - "completion": "0.00000028", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000013" }, - "provider_display_name": "nCompass", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.ncompass.tech/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "nCompass", + "displayName": "Nebius Token Factory", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://console.ncompass.tech/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": true, + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "NCompass", - "owners": ["{}"], - "slug": "ncompass", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-120b", - "provider_name": "NCompass", + "provider_model_id": "NousResearch/Hermes-4-70B", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "ncompass", - "quantization": "unknown", + "provider_slug": "nebius/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", "top_k", - "min_p", "repetition_penalty", - "tools", - "tool_choice" + "response_format" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": null + "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." } }, - "group": "GPT", + "group": "Llama3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "NousResearch/Hermes-4-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Nous: Hermes 4 70B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "nousresearch/hermes-4-70b", "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": null + "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Hermes 4 70B", + "slug": "nousresearch/hermes-4-70b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131000, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "nvidia", + "context_length": 131072, + "created_at": "2025-04-08T12:24:19.697786+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": true, - "context_length": 131000, + "adapter_name": "NebiusAdapter", + "can_abort": false, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -94588,7 +94348,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "093d4af2-499e-4f6e-b4fa-68b969992682", + "id": "f4c50341-ced1-4d4f-afe6-8082c3f052bb", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -94597,113 +94357,111 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", + "author": "nvidia", "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "created_at": "2025-04-08T12:24:19.697786+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, - "group": "GPT", + "group": "Llama3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 3.1 Nemotron Ultra 253B v1", + "slug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "model_variant_slug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", "moderation_required": false, - "name": "NCompass | openai/gpt-oss-20b", + "name": "Nebius | nvidia/llama-3.1-nemotron-ultra-253b-v1", "pricing": { - "completion": "0.00000015", + "completion": "0.0000018", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, - "provider_display_name": "nCompass", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.ncompass.tech/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "nCompass", + "displayName": "Nebius Token Factory", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://console.ncompass.tech/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": true, + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "NCompass", - "owners": ["{}"], - "slug": "ncompass", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-20b", - "provider_name": "NCompass", + "provider_model_id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "ncompass", - "quantization": "unknown", + "provider_slug": "nebius/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", "top_k", - "min_p", - "repetition_penalty" + "repetition_penalty", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -94712,65 +94470,56 @@ "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, - "group": "GPT", + "group": "Llama3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 3.1 Nemotron Ultra 253B v1", + "slug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", + "author": "openai", "context_length": 131072, - "created_at": "2025-04-28T22:16:44.177326+00:00", + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": true, + "adapter_name": "NebiusAdapter", + "can_abort": false, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -94780,7 +94529,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9c491e18-fd96-41d6-92c0-2311f0f5268e", + "id": "fec8a05d-b2aa-4bbd-adbf-fa688dfa984e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -94789,21 +94538,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "openai", "context_length": 131072, - "created_at": "2025-04-28T22:16:44.177326+00:00", + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -94812,92 +94561,97 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", - "model_variant_slug": "qwen/qwen3-30b-a3b", + "model_variant_permaslug": "openai/gpt-oss-120b", + "model_variant_slug": "openai/gpt-oss-120b", "moderation_required": false, - "name": "NCompass | qwen/qwen3-30b-a3b-04-28", + "name": "Nebius | openai/gpt-oss-120b", "pricing": { - "completion": "0.00000028", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, - "provider_display_name": "nCompass", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.ncompass.tech/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "nCompass", + "displayName": "Nebius Token Factory", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://console.ncompass.tech/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": true, + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "NCompass", - "owners": ["{}"], - "slug": "ncompass", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "qwen/qwen3-30b-a3b", - "provider_name": "NCompass", + "provider_model_id": "openai/gpt-oss-120b", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "ncompass/fp8", - "quantization": "fp8", + "provider_slug": "nebius/fp4", + "quantization": "fp4", "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", "top_k", - "min_p", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -94913,50 +94667,52 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T21:32:25.189881+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": true, - "context_length": 40960, + "adapter_name": "NebiusAdapter", + "can_abort": false, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "supported_parameters": {}, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -94966,7 +94722,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "ffc8c1a7-fefd-4f39-93cc-f54cf7955cb7", + "id": "94e1e794-6992-4a4a-975f-420382a5e5e9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -94975,106 +94731,122 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 40960, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "openai", "context_length": 131072, - "created_at": "2025-04-28T21:32:25.189881+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen3", + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-32b-04-28", - "model_variant_slug": "qwen/qwen3-32b", + "model_variant_permaslug": "openai/gpt-oss-20b", + "model_variant_slug": "openai/gpt-oss-20b", "moderation_required": false, - "name": "NCompass | qwen/qwen3-32b-04-28", + "name": "Nebius | openai/gpt-oss-20b", "pricing": { - "completion": "0.00000028", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000005" }, - "provider_display_name": "nCompass", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.ncompass.tech/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "nCompass", + "displayName": "Nebius Token Factory", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://console.ncompass.tech/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": true, + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "NCompass", - "owners": ["{}"], - "slug": "ncompass", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "qwen/qwen3-32b", - "provider_name": "NCompass", + "provider_model_id": "openai/gpt-oss-20b", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "ncompass/fp8", - "quantization": "fp8", + "provider_slug": "nebius/fp4", + "quantization": "fp4", "supported_parameters": [ "reasoning", "include_reasoning", "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", "top_k", - "min_p", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -95083,60 +94855,63 @@ "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen3", + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "alibaba", + "author": "prime-intellect", "context_length": 131072, - "created_at": "2025-09-18T15:53:24.33761+00:00", + "created_at": "2025-11-27T03:02:14.49479+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, + "temperature": 0.6, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\n\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.", + "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": true, + "adapter_name": "NebiusAdapter", + "can_abort": false, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": false - }, + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -95146,7 +94921,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "7d7e8fdf-1dcf-40b9-8521-3d50baa3fbff", + "id": "54e0f2d4-d791-4a30-ab77-74efbb8b06f5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -95159,17 +94934,17 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "alibaba", + "author": "prime-intellect", "context_length": 131072, - "created_at": "2025-09-18T15:53:24.33761+00:00", + "created_at": "2025-11-27T03:02:14.49479+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, + "temperature": 0.6, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks and delivers state-of-the-art performance on benchmarks like Humanity's Last Exam, BrowserComp, BrowserComp-ZH, WebWalkerQA, GAIA, xbench-DeepSearch, and FRAMES. This makes it superior for complex agentic search, reasoning, and multi-step problem-solving compared to prior models.\n\nThe model includes a fully automated synthetic data pipeline for scalable pre-training, fine-tuning, and reinforcement learning. It uses large-scale continual pre-training on diverse agentic data to boost reasoning and stay fresh. It also features end-to-end on-policy RL with a customized Group Relative Policy Optimization, including token-level gradients and negative sample filtering for stable training. The model supports ReAct for core ability checks and an IterResearch-based 'Heavy' mode for max performance through test-time scaling. It's ideal for advanced research agents, tool use, and heavy inference workflows.", + "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -95180,89 +94955,95 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B", + "hf_slug": "PrimeIntellect/INTELLECT-3-FP8", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Tongyi DeepResearch 30B A3B", + "name": "Prime Intellect: INTELLECT-3", "output_modalities": ["text"], - "permaslug": "alibaba/tongyi-deepresearch-30b-a3b", + "permaslug": "prime-intellect/intellect-3-20251126", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Tongyi DeepResearch 30B A3B", - "slug": "alibaba/tongyi-deepresearch-30b-a3b", - "updated_at": "2026-01-10T14:09:25.267618+00:00", + "short_name": "INTELLECT-3", + "slug": "prime-intellect/intellect-3", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "alibaba/tongyi-deepresearch-30b-a3b", - "model_variant_slug": "alibaba/tongyi-deepresearch-30b-a3b", + "model_variant_permaslug": "prime-intellect/intellect-3-20251126", + "model_variant_slug": "prime-intellect/intellect-3", "moderation_required": false, - "name": "NCompass | alibaba/tongyi-deepresearch-30b-a3b", + "name": "Nebius | prime-intellect/intellect-3-20251126", "pricing": { - "completion": "0.0000004", + "completion": "0.0000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, - "provider_display_name": "nCompass", + "provider_display_name": "Nebius Token Factory", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.ncompass.tech/v1", + "adapterName": "NebiusAdapter", + "baseUrl": "https://api.studio.nebius.ai/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://ncompass.tech/privacy", - "retainsPrompts": true, - "termsOfServiceURL": "https://ncompass.tech/terms", + "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "retainsPrompts": false, + "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "nCompass", + "displayName": "Nebius Token Factory", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "NL", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://console.ncompass.tech/&size=256" + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": true, + "ignoredProviderModels": [ + "Qwen/Qwen3-Embedding-8B", + "BAAI/bge-en-icl", + "BAAI/bge-multilingual-gemma2", + "intfloat/e5-mistral-7b-instruct", + "Qwen/Qwen3-235B-A22B-Thinking-2507", + "black-forest-labs/flux-dev", + "black-forest-labs/flux-schnell", + "google/gemma-2-2b-it", + "NousResearch/Hermes-4-70B", + "NousResearch/Hermes-4-405B" + ], + "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "NCompass", - "owners": ["{}"], - "slug": "ncompass", + "name": "Nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "alibaba/tongyi-deepresearch-30b-a3b", - "provider_name": "NCompass", + "provider_model_id": "PrimeIntellect/INTELLECT-3", + "provider_name": "Nebius", "provider_region": null, - "provider_slug": "ncompass/bf16", - "quantization": "bf16", + "provider_slug": "nebius/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", - "response_format", "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", - "seed", "top_k", - "min_p", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -95280,55 +95061,38 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B", + "hf_slug": "PrimeIntellect/INTELLECT-3-FP8", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Tongyi DeepResearch 30B A3B", + "name": "Prime Intellect: INTELLECT-3", "output_modalities": ["text"], - "permaslug": "alibaba/tongyi-deepresearch-30b-a3b", + "permaslug": "prime-intellect/intellect-3-20251126", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Tongyi DeepResearch 30B A3B", - "slug": "alibaba/tongyi-deepresearch-30b-a3b", - "updated_at": "2026-01-10T14:09:25.267618+00:00", + "short_name": "INTELLECT-3", + "slug": "prime-intellect/intellect-3", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null - } - ], - "name": "NCompass", - "slug": "ncompass" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "Nebius Token Factory", - "headquarters": "NL", - "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" - }, - "models": [ + }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "author": "qwen", + "context_length": 32768, + "created_at": "2025-04-15T16:34:47.067646+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "description": "Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing. Based on the Qwen2.5 architecture, it incorporates enhancements like RoPE, SwiGLU, RMSNorm, and GQA attention with support for up to 128K tokens using YaRN-based extrapolation. It is trained on a large corpus of source code, synthetic data, and text-code grounding, providing robust performance across programming languages and agentic coding workflows.\n\nThis model is part of the Qwen2.5-Coder family and offers strong compatibility with tools like vLLM for efficient deployment. Released under the Apache 2.0 license.", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 163840, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -95337,7 +95101,10 @@ "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -95347,7 +95114,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "09c158fc-63e9-4c3a-813e-c952828ac296", + "id": "5c560917-3588-4e4f-8684-79961d0b30f9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -95360,47 +95127,42 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", + "author": "qwen", "context_length": 131072, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "created_at": "2025-04-15T16:34:47.067646+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "description": "Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing. Based on the Qwen2.5 architecture, it incorporates enhancements like RoPE, SwiGLU, RMSNorm, and GQA attention with support for up to 128K tokens using YaRN-based extrapolation. It is trained on a large corpus of source code, synthetic data, and text-code grounding, providing robust performance across programming languages and agentic coding workflows.\n\nThis model is part of the Qwen2.5-Coder family and offers strong compatibility with tools like vLLM for efficient deployment. Released under the Apache 2.0 license.", "features": {}, - "group": "DeepSeek", + "group": "Qwen", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "Qwen/Qwen2.5-Coder-7B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "model_version_group_id": null, + "name": "Qwen: Qwen2.5 Coder 7B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", + "permaslug": "qwen/qwen2.5-coder-7b-instruct", "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "Qwen2.5 Coder 7B Instruct", + "slug": "qwen/qwen2.5-coder-7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", - "model_variant_slug": "deepseek/deepseek-chat-v3-0324", + "model_variant_permaslug": "qwen/qwen2.5-coder-7b-instruct", + "model_variant_slug": "qwen/qwen2.5-coder-7b-instruct", "moderation_required": false, - "name": "Nebius | deepseek/deepseek-chat-v3-0324", + "name": "Nebius | qwen/qwen2.5-coder-7b-instruct", "pricing": { - "completion": "0.0000015", + "completion": "0.00000009", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000003" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "Nebius AI Studio (Fast)", "provider_info": { "adapterName": "NebiusAdapter", "baseUrl": "https://api.studio.nebius.ai/v1", @@ -95412,7 +95174,7 @@ "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "Nebius AI Studio (Fast)", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, @@ -95437,67 +95199,63 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], - "slug": "nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius/fast", "statusPageUrl": null }, - "provider_model_id": "deepseek-ai/DeepSeek-V3-0324", + "provider_model_id": "Qwen/Qwen2.5-Coder-7B-fast", "provider_name": "Nebius", "provider_region": null, - "provider_slug": "nebius/fp8", + "provider_slug": "nebius/fast", "quantization": "fp8", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "frequency_penalty", "presence_penalty", "top_k", - "repetition_penalty", - "tools", - "tool_choice" + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "DeepSeek", + "group": "Qwen", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "Qwen/Qwen2.5-Coder-7B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "model_version_group_id": null, + "name": "Qwen: Qwen2.5 Coder 7B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", + "permaslug": "qwen/qwen2.5-coder-7b-instruct", "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "Qwen2.5 Coder 7B Instruct", + "slug": "qwen/qwen2.5-coder-7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-05-28T17:59:30.833128+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "qwen", + "context_length": 32000, + "created_at": "2025-02-01T11:45:11.997326+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 163840, + "context_length": 32000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -95506,6 +95264,7 @@ "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -95515,7 +95274,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5d263135-78cb-4d45-a44e-d52c19da5139", + "id": "2fff2f07-5438-4dcd-854c-6a8ca11fd420", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -95528,60 +95287,40 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-05-28T17:59:30.833128+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "qwen", + "context_length": 131072, + "created_at": "2025-02-01T11:45:11.997326+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-r1", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: R1 0528", + "name": "Qwen: Qwen2.5 VL 72B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "qwen/qwen2.5-vl-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "R1 0528", - "slug": "deepseek/deepseek-r1-0528", - "updated_at": "2026-01-08T20:10:31.314892+00:00", + "short_name": "Qwen2.5 VL 72B Instruct", + "slug": "qwen/qwen2.5-vl-72b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-0528", - "model_variant_slug": "deepseek/deepseek-r1-0528", + "model_variant_permaslug": "qwen/qwen2.5-vl-72b-instruct", + "model_variant_slug": "qwen/qwen2.5-vl-72b-instruct", "moderation_required": false, - "name": "Nebius | deepseek/deepseek-r1-0528", + "name": "Nebius | qwen/qwen2.5-vl-72b-instruct", "pricing": { - "completion": "0.0000024", + "completion": "0.00000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000008", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -95620,76 +95359,53 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "deepseek-ai/DeepSeek-R1-0528", + "provider_model_id": "Qwen/Qwen2.5-VL-72B-Instruct", "provider_name": "Nebius", "provider_region": null, "provider_slug": "nebius/fp8", "quantization": "fp8", - "supported_parameters": [ - "reasoning", - "include_reasoning", - "max_tokens", - "temperature", - "top_p", - "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice" - ], + "supported_parameters": ["max_tokens"], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-r1", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: R1 0528", + "name": "Qwen: Qwen2.5 VL 72B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "qwen/qwen2.5-vl-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "R1 0528", - "slug": "deepseek/deepseek-r1-0528", - "updated_at": "2026-01-08T20:10:31.314892+00:00", + "short_name": "Qwen2.5 VL 72B Instruct", + "slug": "qwen/qwen2.5-vl-72b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 8192, - "created_at": "2024-06-28T00:00:00+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", "default_parameters": {}, - "default_stops": ["", "", ""], + "default_stops": [], "default_system": null, - "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 8192, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -95698,10 +95414,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -95711,7 +95423,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "a8806d5f-31c2-4f08-9cd7-a39cfec2f648", + "id": "d115b865-d5c4-416d-b622-72afdef64698", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -95724,47 +95436,52 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 8192, - "created_at": "2024-06-28T00:00:00+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", "default_parameters": {}, - "default_stops": ["", "", ""], + "default_stops": [], "default_system": null, - "description": "Gemma 2 9B by Google is an advanced, open-source language model that sets a new standard for efficiency and performance in its size class.\n\nDesigned for a wide variety of tasks, it empowers developers and researchers to build innovative applications, while maintaining accessibility, safety, and cost-effectiveness.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", - "features": {}, - "group": "Gemini", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "google/gemma-2-9b-it", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "gemma", + "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemma 2 9B", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "google/gemma-2-9b-it", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-07-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Gemma 2 9B", - "slug": "google/gemma-2-9b-it", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-2-9b-it", - "model_variant_slug": "google/gemma-2-9b-it", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "Nebius | google/gemma-2-9b-it", + "name": "Nebius | qwen/qwen3-235b-a22b-07-25", "pricing": { - "completion": "0.00000009", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, - "provider_display_name": "Nebius AI Studio (Fast)", + "provider_display_name": "Nebius Token Factory", "provider_info": { "adapterName": "NebiusAdapter", "baseUrl": "https://api.studio.nebius.ai/v1", @@ -95776,7 +95493,7 @@ "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", "training": false }, - "displayName": "Nebius AI Studio (Fast)", + "displayName": "Nebius Token Factory", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, @@ -95801,14 +95518,14 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], - "slug": "nebius/fast", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "google/gemma-2-9b-it-fast", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "provider_name": "Nebius", "provider_region": null, - "provider_slug": "nebius/fast", + "provider_slug": "nebius/fp8", "quantization": "fp8", "supported_parameters": [ "max_tokens", @@ -95817,49 +95534,59 @@ "frequency_penalty", "presence_penalty", "top_k", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Gemini", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "google/gemma-2-9b-it", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "gemma", + "instruct_type": null, "model_version_group_id": null, - "name": "Google: Gemma 2 9B", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "google/gemma-2-9b-it", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-07-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Gemma 2 9B", - "slug": "google/gemma-2-9b-it", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 110000, - "created_at": "2025-03-12T05:12:39.645813+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["", "", ""], + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-29T16:36:05.687988+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 110000, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -95868,7 +95595,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -95877,8 +95603,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "db3af039-8c6e-4b32-bbf5-c858d644d1b5", + "has_completions": true, + "id": "52dbed07-6ee1-40d4-ae95-7375769ab577", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -95891,60 +95617,50 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", + "author": "qwen", "context_length": 131072, - "created_at": "2025-03-12T05:12:39.645813+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["", "", ""], + "created_at": "2025-07-29T16:36:05.687988+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "hf_slug": "Qwen/Qwen3-30B-A3B-Instruct-2507", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Qwen: Qwen3 30B A3B Instruct 2507", "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", + "permaslug": "qwen/qwen3-30b-a3b-instruct-2507", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "Qwen3 30B A3B Instruct 2507", + "slug": "qwen/qwen3-30b-a3b-instruct-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-3-27b-it", - "model_variant_slug": "google/gemma-3-27b-it", + "model_variant_permaslug": "qwen/qwen3-30b-a3b-instruct-2507", + "model_variant_slug": "qwen/qwen3-30b-a3b-instruct-2507", "moderation_required": false, - "name": "Nebius | google/gemma-3-27b-it", + "name": "Nebius | qwen/qwen3-30b-a3b-instruct-2507", "pricing": { "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -95983,64 +95699,75 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "google/gemma-3-27b-it", + "provider_model_id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "provider_name": "Nebius", "provider_region": null, "provider_slug": "nebius/fp8", "quantization": "fp8", - "supported_parameters": ["max_tokens"], + "supported_parameters": [ + "max_tokens", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "top_k", + "repetition_penalty", + "tools", + "tool_choice", + "response_format", + "structured_outputs" + ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Gemini", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "hf_slug": "Qwen/Qwen3-30B-A3B-Instruct-2507", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Qwen: Qwen3 30B A3B Instruct 2507", "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", + "permaslug": "qwen/qwen3-30b-a3b-instruct-2507", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "Qwen3 30B A3B Instruct 2507", + "slug": "qwen/qwen3-30b-a3b-instruct-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2025-02-12T23:01:58.468577+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-08-28T16:39:52.539313+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n", + "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\n\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -96049,7 +95776,9 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -96059,7 +95788,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "b2fbbc15-67f2-4b72-94a0-112f1591f295", + "id": "b355ec39-9229-4941-ab24-009c916ea9cc", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -96072,45 +95801,50 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 0, - "created_at": "2025-02-12T23:01:58.468577+00:00", + "author": "qwen", + "context_length": 131072, + "created_at": "2025-08-28T16:39:52.539313+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM – it generates text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 3 was aligned to safeguard against the MLCommons standardized hazards taxonomy and designed to support Llama 3.1 capabilities. Specifically, it provides content moderation in 8 languages, and was optimized to support safety and security for search and code interpreter tool calls.\n", - "features": {}, - "group": "Llama3", + "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\n\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-Guard-3-8B", + "hf_slug": "Qwen/Qwen3-30B-A3B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "none", + "instruct_type": null, "model_version_group_id": null, - "name": "Llama Guard 3 8B", + "name": "Qwen: Qwen3 30B A3B Thinking 2507", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-3-8b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-30b-a3b-thinking-2507", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama Guard 3 8B", - "slug": "meta-llama/llama-guard-3-8b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 30B A3B Thinking 2507", + "slug": "qwen/qwen3-30b-a3b-thinking-2507", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-guard-3-8b", - "model_variant_slug": "meta-llama/llama-guard-3-8b", + "model_variant_permaslug": "qwen/qwen3-30b-a3b-thinking-2507", + "model_variant_slug": "qwen/qwen3-30b-a3b-thinking-2507", "moderation_required": false, - "name": "Nebius | meta-llama/llama-guard-3-8b", + "name": "Nebius | qwen/qwen3-30b-a3b-thinking-2507", "pricing": { - "completion": "0.00000006", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -96149,53 +95883,77 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Llama-Guard-3-8B", + "provider_model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "provider_name": "Nebius", "provider_region": null, - "provider_slug": "nebius", - "quantization": "unknown", - "supported_parameters": ["max_tokens"], + "provider_slug": "nebius/fp8", + "quantization": "fp8", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "top_k", + "repetition_penalty", + "tools", + "tool_choice", + "response_format", + "structured_outputs" + ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-Guard-3-8B", + "hf_slug": "Qwen/Qwen3-30B-A3B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "none", + "instruct_type": null, "model_version_group_id": null, - "name": "Llama Guard 3 8B", + "name": "Qwen: Qwen3 30B A3B Thinking 2507", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-3-8b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-30b-a3b-thinking-2507", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama Guard 3 8B", - "slug": "meta-llama/llama-guard-3-8b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 30B A3B Thinking 2507", + "slug": "qwen/qwen3-30b-a3b-thinking-2507", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "author": "qwen", + "context_length": 40960, + "created_at": "2025-04-28T21:32:25.189881+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 40960, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -96204,9 +95962,7 @@ "training": false }, "features": { - "supported_parameters": { - "structured_outputs": false - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -96216,7 +95972,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "474a9b4c-3ad1-403a-b84a-763335ae8f61", + "id": "aaa39b6c-e488-491a-91e4-e5fb66f86601", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -96229,45 +95985,48 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", + "author": "qwen", "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "created_at": "2025-04-28T21:32:25.189881+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "" + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": "Qwen/Qwen3-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "instruct_type": "qwen3", + "model_version_group_id": null, + "name": "Qwen: Qwen3 32B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-32b-04-28", + "reasoning_config": { + "end_token": "", + "start_token": "" + }, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", + "short_name": "Qwen3 32B", + "slug": "qwen/qwen3-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_permaslug": "qwen/qwen3-32b-04-28", + "model_variant_slug": "qwen/qwen3-32b", "moderation_required": false, - "name": "Nebius | meta-llama/llama-3.1-8b-instruct", + "name": "Nebius | qwen/qwen3-32b-04-28", "pricing": { - "completion": "0.00000006", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -96306,16 +96065,18 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], - "slug": "nebius", + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], + "slug": "nebius/base", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "provider_model_id": "Qwen/Qwen3-32B", "provider_name": "Nebius", "provider_region": null, - "provider_slug": "nebius/fp8", + "provider_slug": "nebius/base", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -96329,42 +96090,50 @@ "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "" + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": "Qwen/Qwen3-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "instruct_type": "qwen3", + "model_version_group_id": null, + "name": "Qwen: Qwen3 32B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-32b-04-28", + "reasoning_config": { + "end_token": "", + "start_token": "" + }, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", + "short_name": "Qwen3 32B", + "slug": "qwen/qwen3-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-31T14:32:59.359308+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -96373,6 +96142,8 @@ "training": false }, "features": { + "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -96382,7 +96153,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5f78cde2-65d7-4e4a-86a2-79f702973039", + "id": "43212978-38b8-48a3-ae45-d0009f224b98", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -96395,45 +96166,50 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", + "author": "qwen", + "context_length": 0, + "created_at": "2025-07-31T14:32:59.359308+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", - "features": {}, - "group": "Llama3", + "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", + "short_name": "Qwen3 Coder 30B A3B Instruct", + "slug": "qwen/qwen3-coder-30b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "model_variant_slug": "qwen/qwen3-coder-30b-a3b-instruct", "moderation_required": false, - "name": "Nebius | meta-llama/llama-3.3-70b-instruct", + "name": "Nebius | qwen/qwen3-coder-30b-a3b-instruct", "pricing": { - "completion": "0.0000004", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000013", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -96472,11 +96248,11 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct", + "provider_model_id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "provider_name": "Nebius", "provider_region": null, "provider_slug": "nebius/fp8", @@ -96490,7 +96266,9 @@ "top_k", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, @@ -96498,37 +96276,47 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", + "short_name": "Qwen3 Coder 30B A3B Instruct", + "slug": "qwen/qwen3-coder-30b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 131072, - "created_at": "2025-07-11T19:47:32.565514+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-23T00:29:06+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -96546,7 +96334,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "900353a5-eceb-48bc-91e5-de424b9cecc6", + "id": "abdef935-3e39-46b2-a40e-fb5920f88abb", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -96555,17 +96343,17 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 131072, - "created_at": "2025-07-11T19:47:32.565514+00:00", + "author": "qwen", + "context_length": 1048576, + "created_at": "2025-07-23T00:29:06+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "features": { "reasoning_config": { "end_token": null, @@ -96573,41 +96361,36 @@ "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0711", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0711", - "slug": "moonshotai/kimi-k2", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2", - "model_variant_slug": "moonshotai/kimi-k2", + "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "model_variant_slug": "qwen/qwen3-coder", "moderation_required": false, - "name": "Nebius | moonshotai/kimi-k2", + "name": "Nebius | qwen/qwen3-coder-480b-a35b-07-25", "pricing": { - "completion": "0.0000024", + "completion": "0.0000018", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -96646,15 +96429,15 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "moonshotai/Kimi-K2-Instruct", + "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "provider_name": "Nebius", "provider_region": null, - "provider_slug": "nebius/fp4", - "quantization": "fp4", + "provider_slug": "nebius/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -96664,7 +96447,9 @@ "top_k", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, @@ -96679,32 +96464,32 @@ "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0711", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0711", - "slug": "moonshotai/kimi-k2", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "author": "qwen", + "context_length": 32000, + "created_at": "2025-10-28T19:43:42.126124+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -96712,11 +96497,11 @@ }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 32000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -96725,8 +96510,7 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -96736,7 +96520,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5dfd92d5-ca14-4ea6-8667-76e5dd2187d4", + "id": "2b66411e-65b9-4964-b19c-e6ce40752434", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -96745,13 +96529,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "author": "qwen", + "context_length": 32000, + "created_at": "2025-10-28T19:43:42.126124+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -96759,50 +96543,45 @@ }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Other", - "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "has_text_output": false, + "hf_slug": "Qwen/Qwen3-Embedding-8B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", - "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "name": "Qwen: Qwen3 Embedding 8B", + "output_modalities": ["embeddings"], + "permaslug": "qwen/qwen3-embedding-8b", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Embedding 8B", + "slug": "qwen/qwen3-embedding-8b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", + "model_variant_permaslug": "qwen/qwen3-embedding-8b", + "model_variant_slug": "qwen/qwen3-embedding-8b", "moderation_required": false, - "name": "Nebius | moonshotai/kimi-k2-thinking-20251106", + "name": "Nebius | qwen/qwen3-embedding-8b", "pricing": { - "completion": "0.0000025", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000001" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -96841,78 +96620,80 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "moonshotai/Kimi-K2-Thinking", + "provider_model_id": "Qwen/Qwen3-Embedding-8B", "provider_name": "Nebius", "provider_region": null, - "provider_slug": "nebius/fp8", - "quantization": "fp8", + "provider_slug": "nebius", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", "presence_penalty", + "seed", "top_k", - "repetition_penalty", - "tools", - "tool_choice", - "response_format", - "structured_outputs" + "logit_bias", + "logprobs", + "top_logprobs" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Other", - "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "has_text_output": false, + "hf_slug": "Qwen/Qwen3-Embedding-8B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", - "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "name": "Qwen: Qwen3 Embedding 8B", + "output_modalities": ["embeddings"], + "permaslug": "qwen/qwen3-embedding-8b", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Embedding 8B", + "slug": "qwen/qwen3-embedding-8b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "nousresearch", - "context_length": 131072, - "created_at": "2025-08-26T19:11:03.380181+00:00", - "default_parameters": {}, + "author": "qwen", + "context_length": 128000, + "created_at": "2025-09-11T17:38:04.192907+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with ... traces or respond directly, offering flexibility between speed and depth. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -96921,10 +96702,7 @@ "training": false }, "features": { - "supported_parameters": { - "structured_outputs": false - }, - "supports_input_audio": false, + "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -96934,7 +96712,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c575da03-bbd8-4d9e-963d-497ea0ffcb13", + "id": "4dc7f3c5-67c9-419a-8d64-0dffa7d3e7cc", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -96947,55 +96725,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nousresearch", - "context_length": 0, - "created_at": "2025-08-26T19:11:03.380181+00:00", - "default_parameters": {}, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:38:04.192907+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with ... traces or respond directly, offering flexibility between speed and depth. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model is instruction-tuned with an expanded post-training corpus (~60B tokens) emphasizing reasoning traces, improving performance in math, code, STEM, and logical reasoning, while retaining broad assistant utility. It also supports structured outputs, including JSON mode, schema adherence, function calling, and tool use. Hermes 4 is trained for steerability, lower refusal rates, and alignment toward neutral, user-directed behavior.", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." + "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-4-405B", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Nous: Hermes 4 405B", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-4-405b", + "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." + "system_prompt": null }, "router": null, - "short_name": "Hermes 4 405B", - "slug": "nousresearch/hermes-4-405b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Next 80B A3B Thinking", + "slug": "qwen/qwen3-next-80b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "nousresearch/hermes-4-405b", - "model_variant_slug": "nousresearch/hermes-4-405b", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-thinking", "moderation_required": false, - "name": "Nebius | nousresearch/hermes-4-405b", + "name": "Nebius | qwen/qwen3-next-80b-a3b-thinking-2509", "pricing": { - "completion": "0.000003", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -97034,11 +96812,11 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "NousResearch/Hermes-4-405B", + "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "provider_name": "Nebius", "provider_region": null, "provider_slug": "nebius/fp8", @@ -97053,51 +96831,59 @@ "presence_penalty", "top_k", "repetition_penalty", - "response_format" + "tools", + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." + "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-4-405B", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Nous: Hermes 4 405B", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-4-405b", + "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." + "system_prompt": null }, "router": null, - "short_name": "Hermes 4 405B", - "slug": "nousresearch/hermes-4-405b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Next 80B A3B Thinking", + "slug": "qwen/qwen3-next-80b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "nousresearch", + "author": "z-ai", "context_length": 131072, - "created_at": "2025-08-26T19:23:02.446988+00:00", - "default_parameters": {}, + "created_at": "2025-07-25T19:22:27.278283+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.75, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit ... reasoning traces before answering. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.", + "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, @@ -97110,10 +96896,11 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supported_parameters": { - "structured_outputs": false + "response_format": true, + "structured_outputs": true }, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -97123,7 +96910,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9e1bd3ed-059c-4fcf-9435-c1993ef564db", + "id": "fde64dda-3785-4195-95cb-8f4014802805", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -97136,55 +96923,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nousresearch", + "author": "z-ai", "context_length": 131072, - "created_at": "2025-08-26T19:23:02.446988+00:00", - "default_parameters": {}, + "created_at": "2025-07-25T19:22:27.278283+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.75, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either respond directly or generate explicit ... reasoning traces before answering. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThis 70B variant is trained with the expanded post-training corpus (~60B tokens) emphasizing verified reasoning data, leading to improvements in mathematics, coding, STEM, logic, and structured outputs while maintaining general assistant performance. It supports JSON mode, schema adherence, function calling, and tool use, and is designed for greater steerability with reduced refusal rates.", + "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." + "system_prompt": null } }, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-4-70B", + "hf_slug": "zai-org/GLM-4.5", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Nous: Hermes 4 70B", + "name": "Z.AI: GLM 4.5", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-4-70b", + "permaslug": "z-ai/glm-4.5", "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." + "system_prompt": null }, "router": null, - "short_name": "Hermes 4 70B", - "slug": "nousresearch/hermes-4-70b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.5", + "slug": "z-ai/glm-4.5", + "updated_at": "2026-01-05T22:04:10.598351+00:00", "warning_message": null }, - "model_variant_permaslug": "nousresearch/hermes-4-70b", - "model_variant_slug": "nousresearch/hermes-4-70b", + "model_variant_permaslug": "z-ai/glm-4.5", + "model_variant_slug": "z-ai/glm-4.5", "moderation_required": false, - "name": "Nebius | nousresearch/hermes-4-70b", + "name": "Nebius | z-ai/glm-4.5", "pricing": { - "completion": "0.0000004", + "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000013", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -97223,11 +97010,11 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "NousResearch/Hermes-4-70B", + "provider_model_id": "zai-org/GLM-4.5", "provider_name": "Nebius", "provider_region": null, "provider_slug": "nebius/fp8", @@ -97235,6 +97022,8 @@ "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -97242,51 +97031,57 @@ "presence_penalty", "top_k", "repetition_penalty", - "response_format" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." + "system_prompt": null } }, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-4-70B", + "hf_slug": "zai-org/GLM-4.5", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Nous: Hermes 4 70B", + "name": "Z.AI: GLM 4.5", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-4-70b", + "permaslug": "z-ai/glm-4.5", "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem." + "system_prompt": null }, "router": null, - "short_name": "Hermes 4 70B", - "slug": "nousresearch/hermes-4-70b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.5", + "slug": "z-ai/glm-4.5", + "updated_at": "2026-01-05T22:04:10.598351+00:00", "warning_message": null }, { - "author": "nvidia", + "author": "z-ai", "context_length": 131072, - "created_at": "2025-04-08T12:24:19.697786+00:00", - "default_parameters": {}, + "created_at": "2025-07-25T19:20:58.066206+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.75, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.", + "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, @@ -97299,7 +97094,11 @@ "training": false }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -97309,7 +97108,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f4c50341-ced1-4d4f-afe6-8082c3f052bb", + "id": "e16179dc-e37e-4c38-af45-cb1df5773f10", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -97322,53 +97121,54 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nvidia", + "author": "z-ai", "context_length": 131072, - "created_at": "2025-04-08T12:24:19.697786+00:00", - "default_parameters": {}, + "created_at": "2025-07-25T19:20:58.066206+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.75, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.", + "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "features": { "reasoning_config": { - "end_token": null, - "start_token": null + "end_token": "", + "start_token": "", + "system_prompt": null } }, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", + "hf_slug": "zai-org/GLM-4.5-Air", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1", + "name": "Z.AI: GLM 4.5 Air", "output_modalities": ["text"], - "permaslug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "permaslug": "z-ai/glm-4.5-air", "reasoning_config": { - "end_token": null, - "start_token": null + "end_token": "", + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "Llama 3.1 Nemotron Ultra 253B v1", - "slug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "short_name": "GLM 4.5 Air", + "slug": "z-ai/glm-4.5-air", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", - "model_variant_slug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "model_variant_permaslug": "z-ai/glm-4.5-air", + "model_variant_slug": "z-ai/glm-4.5-air", "moderation_required": false, - "name": "Nebius | nvidia/llama-3.1-nemotron-ultra-253b-v1", + "name": "Nebius | z-ai/glm-4.5-air", "pricing": { - "completion": "0.0000018", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -97407,11 +97207,11 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", + "provider_model_id": "zai-org/GLM-4.5-Air", "provider_name": "Nebius", "provider_region": null, "provider_slug": "nebius/fp8", @@ -97419,6 +97219,8 @@ "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -97426,58 +97228,60 @@ "presence_penalty", "top_k", "repetition_penalty", - "response_format", - "structured_outputs" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": null, - "start_token": null + "end_token": "", + "start_token": "", + "system_prompt": null } }, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", + "hf_slug": "zai-org/GLM-4.5-Air", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1", + "name": "Z.AI: GLM 4.5 Air", "output_modalities": ["text"], - "permaslug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "permaslug": "z-ai/glm-4.5-air", "reasoning_config": { - "end_token": null, - "start_token": null + "end_token": "", + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "Llama 3.1 Nemotron Ultra 253B v1", - "slug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", + "short_name": "GLM 4.5 Air", + "slug": "z-ai/glm-4.5-air", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "z-ai", + "context_length": 202752, + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "endpoint": { "adapter_name": "NebiusAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 202752, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", @@ -97486,16 +97290,17 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true + "literal_none": false, + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "fec8a05d-b2aa-4bbd-adbf-fa688dfa984e", + "id": "c2876732-9e69-4455-b537-663d3637f2af", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -97508,17 +97313,17 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -97527,41 +97332,36 @@ "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b", - "model_variant_slug": "openai/gpt-oss-120b", + "model_variant_permaslug": "z-ai/glm-4.7-20251222", + "model_variant_slug": "z-ai/glm-4.7", "moderation_required": false, - "name": "Nebius | openai/gpt-oss-120b", + "name": "Nebius | z-ai/glm-4.7-20251222", "pricing": { - "completion": "0.0000006", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, "provider_display_name": "Nebius Token Factory", "provider_info": { @@ -97600,15 +97400,15 @@ "isMultipartSupported": true, "moderationRequired": false, "name": "Nebius", - "owners": ["{}"], + "owners": ["org_38IQyXcgfBXsG7mGiURlUC1vqxL", "user_38ZpKrcXXJGkZ6nXTzP6xo17vp5"], "slug": "nebius", "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-120b", + "provider_model_id": "zai-org/GLM-4.7-FP8", "provider_name": "Nebius", "provider_region": null, - "provider_slug": "nebius/fp4", - "quantization": "fp4", + "provider_slug": "nebius/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", @@ -97638,52 +97438,67 @@ "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null - }, + } + ], + "name": "Nebius", + "slug": "nebius" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "NextBit", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + }, + "models": [ { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "deepseek", + "context_length": 32768, + "created_at": "2025-01-29T23:53:50.865297+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\\n\\nOther benchmark results include:\\n\\n- AIME 2024 pass@1: 72.6\\n- MATH-500 pass@1: 94.3\\n- CodeForces Rating: 1691\\n\\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -97692,8 +97507,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "94e1e794-6992-4a4a-975f-420382a5e5e9", + "has_completions": false, + "id": "cde873b2-4187-40df-953f-c6e04ff7fd19", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -97702,192 +97517,151 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "deepseek", + "context_length": 128000, + "created_at": "2025-01-29T23:53:50.865297+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\\n\\nOther benchmark results include:\\n\\n- AIME 2024 pass@1: 72.6\\n- MATH-500 pass@1: 94.3\\n- CodeForces Rating: 1691\\n\\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "GPT", + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "instruct_type": "deepseek-r1", + "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", + "name": "DeepSeek: R1 Distill Qwen 32B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "deepseek/deepseek-r1-distill-qwen-32b", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "R1 Distill Qwen 32B", + "slug": "deepseek/deepseek-r1-distill-qwen-32b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "deepseek/deepseek-r1-distill-qwen-32b", + "model_variant_slug": "deepseek/deepseek-r1-distill-qwen-32b", "moderation_required": false, - "name": "Nebius | openai/gpt-oss-20b", + "name": "NextBit | deepseek/deepseek-r1-distill-qwen-32b", "pricing": { - "completion": "0.0000002", + "completion": "0.00000029", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000029" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-20b", - "provider_name": "Nebius", + "provider_model_id": "deepseek:qwen3-r1-32b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp4", - "quantization": "fp4", + "provider_slug": "nextbit/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice", - "response_format", - "structured_outputs" + "presence_penalty" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "GPT", + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "instruct_type": "deepseek-r1", + "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", + "name": "DeepSeek: R1 Distill Qwen 32B", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "deepseek/deepseek-r1-distill-qwen-32b", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "R1 Distill Qwen 32B", + "slug": "deepseek/deepseek-r1-distill-qwen-32b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "prime-intellect", - "context_length": 131072, - "created_at": "2025-11-27T03:02:14.49479+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": null - }, - "default_stops": [], + "author": "google", + "context_length": 8192, + "created_at": "2024-07-13T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["", "", ""], "default_system": null, - "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", + "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).\n\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 8192, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -97897,7 +97671,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "54e0f2d4-d791-4a30-ab77-74efbb8b06f5", + "id": "03a23b91-c1f2-4a5e-891b-df7e13c87472", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -97906,184 +97680,134 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "prime-intellect", - "context_length": 131072, - "created_at": "2025-11-27T03:02:14.49479+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": null - }, - "default_stops": [], + "author": "google", + "context_length": 8192, + "created_at": "2024-07-13T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["", "", ""], "default_system": null, - "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).\n\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", + "features": {}, + "group": "Gemini", "has_text_output": true, - "hf_slug": "PrimeIntellect/INTELLECT-3-FP8", + "hf_slug": "google/gemma-2-27b-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "gemma", "model_version_group_id": null, - "name": "Prime Intellect: INTELLECT-3", + "name": "Google: Gemma 2 27B", "output_modalities": ["text"], - "permaslug": "prime-intellect/intellect-3-20251126", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "google/gemma-2-27b-it", + "reasoning_config": null, "router": null, - "short_name": "INTELLECT-3", - "slug": "prime-intellect/intellect-3", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemma 2 27B", + "slug": "google/gemma-2-27b-it", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "prime-intellect/intellect-3-20251126", - "model_variant_slug": "prime-intellect/intellect-3", + "model_variant_permaslug": "google/gemma-2-27b-it", + "model_variant_slug": "google/gemma-2-27b-it", "moderation_required": false, - "name": "Nebius | prime-intellect/intellect-3-20251126", + "name": "NextBit | google/gemma-2-27b-it", "pricing": { - "completion": "0.0000011", + "completion": "0.00000065", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000065" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "PrimeIntellect/INTELLECT-3", - "provider_name": "Nebius", + "provider_model_id": "gemma-2:27b-it", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp8", - "quantization": "fp8", + "provider_slug": "nextbit/int4", + "quantization": "int4", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice", "response_format", "structured_outputs" ], - "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_multipart": false, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Gemini", "has_text_output": true, - "hf_slug": "PrimeIntellect/INTELLECT-3-FP8", + "hf_slug": "google/gemma-2-27b-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "gemma", "model_version_group_id": null, - "name": "Prime Intellect: INTELLECT-3", + "name": "Google: Gemma 2 27B", "output_modalities": ["text"], - "permaslug": "prime-intellect/intellect-3-20251126", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "google/gemma-2-27b-it", + "reasoning_config": null, "router": null, - "short_name": "INTELLECT-3", - "slug": "prime-intellect/intellect-3", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Gemma 2 27B", + "slug": "google/gemma-2-27b-it", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32768, - "created_at": "2025-04-15T16:34:47.067646+00:00", + "author": "microsoft", + "context_length": 16384, + "created_at": "2025-01-10T06:17:52.16346+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing. Based on the Qwen2.5 architecture, it incorporates enhancements like RoPE, SwiGLU, RMSNorm, and GQA attention with support for up to 128K tokens using YaRN-based extrapolation. It is trained on a large corpus of source code, synthetic data, and text-code grounding, providing robust performance across programming languages and agentic coding workflows.\n\nThis model is part of the Qwen2.5-Coder family and offers strong compatibility with tools like vLLM for efficient deployment. Released under the Apache 2.0 license.", + "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 32768, + "context_length": 16384, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { "supported_parameters": { - "response_format": true, "structured_outputs": true }, "supports_tool_choice": { @@ -98095,7 +97819,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5c560917-3588-4e4f-8684-79961d0b30f9", + "id": "bfdad85e-1565-4ab4-a4ea-1a4772a11341", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -98104,153 +97828,137 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-15T16:34:47.067646+00:00", + "author": "microsoft", + "context_length": 16384, + "created_at": "2025-01-10T06:17:52.16346+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen2.5-Coder-7B-Instruct is a 7B parameter instruction-tuned language model optimized for code-related tasks such as code generation, reasoning, and bug fixing. Based on the Qwen2.5 architecture, it incorporates enhancements like RoPE, SwiGLU, RMSNorm, and GQA attention with support for up to 128K tokens using YaRN-based extrapolation. It is trained on a large corpus of source code, synthetic data, and text-code grounding, providing robust performance across programming languages and agentic coding workflows.\n\nThis model is part of the Qwen2.5-Coder family and offers strong compatibility with tools like vLLM for efficient deployment. Released under the Apache 2.0 license.", + "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", "features": {}, - "group": "Qwen", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-Coder-7B-Instruct", + "hf_slug": "microsoft/phi-4", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 Coder 7B Instruct", + "name": "Microsoft: Phi 4", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-coder-7b-instruct", + "permaslug": "microsoft/phi-4", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 Coder 7B Instruct", - "slug": "qwen/qwen2.5-coder-7b-instruct", + "short_name": "Phi 4", + "slug": "microsoft/phi-4", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen2.5-coder-7b-instruct", - "model_variant_slug": "qwen/qwen2.5-coder-7b-instruct", + "model_variant_permaslug": "microsoft/phi-4", + "model_variant_slug": "microsoft/phi-4", "moderation_required": false, - "name": "Nebius | qwen/qwen2.5-coder-7b-instruct", + "name": "NextBit | microsoft/phi-4", "pricing": { - "completion": "0.00000009", + "completion": "0.00000014", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000006" }, - "provider_display_name": "Nebius AI Studio (Fast)", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius AI Studio (Fast)", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius/fast", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen2.5-Coder-7B-fast", - "provider_name": "Nebius", + "provider_model_id": "microsoft:phi-4", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fast", - "quantization": "fp8", + "provider_slug": "nextbit/int4", + "quantization": "int4", "supported_parameters": [ "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", "presence_penalty", - "top_k", - "repetition_penalty" + "response_format" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Qwen", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-Coder-7B-Instruct", + "hf_slug": "microsoft/phi-4", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 Coder 7B Instruct", + "name": "Microsoft: Phi 4", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-coder-7b-instruct", + "permaslug": "microsoft/phi-4", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 Coder 7B Instruct", - "slug": "qwen/qwen2.5-coder-7b-instruct", + "short_name": "Phi 4", + "slug": "microsoft/phi-4", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32000, - "created_at": "2025-02-01T11:45:11.997326+00:00", + "author": "gryphe", + "context_length": 4096, + "created_at": "2023-07-02T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["###", ""], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 32000, + "context_length": 4096, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -98260,7 +97968,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "2fff2f07-5438-4dcd-854c-6a8ca11fd420", + "id": "3a9bbd50-68e9-4ebf-92e4-faa0395a1229", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -98269,142 +97977,137 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-02-01T11:45:11.997326+00:00", + "author": "gryphe", + "context_length": 4096, + "created_at": "2023-07-02T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["###", ""], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", "features": {}, - "group": "Qwen", + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "Gryphe/MythoMax-L2-13b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "MythoMax 13B", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", + "permaslug": "gryphe/mythomax-l2-13b", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", + "short_name": "MythoMax 13B", + "slug": "gryphe/mythomax-l2-13b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen2.5-vl-72b-instruct", - "model_variant_slug": "qwen/qwen2.5-vl-72b-instruct", + "model_variant_permaslug": "gryphe/mythomax-l2-13b", + "model_variant_slug": "gryphe/mythomax-l2-13b", "moderation_required": false, - "name": "Nebius | qwen/qwen2.5-vl-72b-instruct", + "name": "NextBit | gryphe/mythomax-l2-13b", "pricing": { - "completion": "0.00000075", + "completion": "0.00000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "prompt": "0.00000006" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen2.5-VL-72B-Instruct", - "provider_name": "Nebius", + "provider_model_id": "mythomax:13b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp8", - "quantization": "fp8", - "supported_parameters": ["max_tokens"], - "supports_multipart": true, + "provider_slug": "nextbit/int4", + "quantization": "int4", + "supported_parameters": [ + "structured_outputs", + "response_format", + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty" + ], + "supports_multipart": false, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Qwen", + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "Gryphe/MythoMax-L2-13b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "MythoMax 13B", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", + "permaslug": "gryphe/mythomax-l2-13b", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", + "short_name": "MythoMax 13B", + "slug": "gryphe/mythomax-l2-13b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", + "author": "neversleep", + "context_length": 32768, + "created_at": "2024-09-15T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/models/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -98414,7 +98117,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "d115b865-d5c4-416d-b622-72afdef64698", + "id": "12d1e58a-8433-4273-8b0e-1a2dcb742d6f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -98423,171 +98126,130 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", + "author": "neversleep", + "context_length": 131072, + "created_at": "2024-09-15T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/models/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "NeverSleep/Lumimaid-v0.2-8B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "NeverSleep: Lumimaid v0.2 8B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "neversleep/llama-3.1-lumimaid-8b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Lumimaid v0.2 8B", + "slug": "neversleep/llama-3.1-lumimaid-8b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "neversleep/llama-3.1-lumimaid-8b", + "model_variant_slug": "neversleep/llama-3.1-lumimaid-8b", "moderation_required": false, - "name": "Nebius | qwen/qwen3-235b-a22b-07-25", + "name": "NextBit | neversleep/llama-3.1-lumimaid-8b", "pricing": { "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000009" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", - "provider_name": "Nebius", + "provider_model_id": "lumimaid:llama31-8b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp8", - "quantization": "fp8", + "provider_slug": "nextbit/int4", + "quantization": "int4", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice", - "response_format", - "structured_outputs" + "presence_penalty" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "NeverSleep/Lumimaid-v0.2-8B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "NeverSleep: Lumimaid v0.2 8B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "neversleep/llama-3.1-lumimaid-8b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Lumimaid v0.2 8B", + "slug": "neversleep/llama-3.1-lumimaid-8b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-29T16:36:05.687988+00:00", + "author": "neversleep", + "context_length": 4096, + "created_at": "2023-11-26T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["###", ""], "default_system": null, - "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.", + "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.\n\n#merge #uncensored", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 4096, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { @@ -98600,7 +98262,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "52dbed07-6ee1-40d4-ae95-7375769ab577", + "id": "8d930e6b-bb96-4daf-8aae-549d21e4b3a0", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -98609,177 +98271,133 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-07-29T16:36:05.687988+00:00", + "author": "neversleep", + "context_length": 8192, + "created_at": "2023-11-26T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["###", ""], "default_system": null, - "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.\n\n#merge #uncensored", + "features": {}, + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "hf_slug": "NeverSleep/Noromaid-20b-v0.1.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B Instruct 2507", + "name": "Noromaid 20B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-instruct-2507", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "neversleep/noromaid-20b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 30B A3B Instruct 2507", - "slug": "qwen/qwen3-30b-a3b-instruct-2507", + "short_name": "Noromaid 20B", + "slug": "neversleep/noromaid-20b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-instruct-2507", - "model_variant_slug": "qwen/qwen3-30b-a3b-instruct-2507", + "model_variant_permaslug": "neversleep/noromaid-20b", + "model_variant_slug": "neversleep/noromaid-20b", "moderation_required": false, - "name": "Nebius | qwen/qwen3-30b-a3b-instruct-2507", + "name": "NextBit | neversleep/noromaid-20b", "pricing": { - "completion": "0.0000003", + "completion": "0.00000175", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.000001" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-30B-A3B-Instruct-2507", - "provider_name": "Nebius", + "provider_model_id": "noromaid:20b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp8", - "quantization": "fp8", + "provider_slug": "nextbit/int4", + "quantization": "int4", "supported_parameters": [ "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice", "response_format", "structured_outputs" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "hf_slug": "NeverSleep/Noromaid-20b-v0.1.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B Instruct 2507", + "name": "Noromaid 20B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-instruct-2507", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "neversleep/noromaid-20b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 30B A3B Instruct 2507", - "slug": "qwen/qwen3-30b-a3b-instruct-2507", + "short_name": "Noromaid 20B", + "slug": "neversleep/noromaid-20b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-08-28T16:39:52.539313+00:00", + "author": "nousresearch", + "context_length": 65536, + "created_at": "2024-08-18T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\n\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.", + "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 65536, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": {}, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -98789,7 +98407,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "b355ec39-9229-4941-ab24-009c916ea9cc", + "id": "19e60da4-d0e0-4799-a949-ea786279a5cb", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -98798,177 +98416,141 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "nousresearch", "context_length": 131072, - "created_at": "2025-08-28T16:39:52.539313+00:00", + "created_at": "2024-08-18T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\n\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "hf_slug": "NousResearch/Hermes-3-Llama-3.1-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B Thinking 2507", + "name": "Nous: Hermes 3 70B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-thinking-2507", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "nousresearch/hermes-3-llama-3.1-70b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 30B A3B Thinking 2507", - "slug": "qwen/qwen3-30b-a3b-thinking-2507", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Hermes 3 70B Instruct", + "slug": "nousresearch/hermes-3-llama-3.1-70b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-thinking-2507", - "model_variant_slug": "qwen/qwen3-30b-a3b-thinking-2507", + "model_variant_permaslug": "nousresearch/hermes-3-llama-3.1-70b", + "model_variant_slug": "nousresearch/hermes-3-llama-3.1-70b", "moderation_required": false, - "name": "Nebius | qwen/qwen3-30b-a3b-thinking-2507", + "name": "NextBit | nousresearch/hermes-3-llama-3.1-70b", "pricing": { "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", - "provider_name": "Nebius", + "provider_model_id": "hermes-3:llama-31-70b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp8", + "provider_slug": "nextbit/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice", "response_format", "structured_outputs" ], - "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_multipart": false, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "hf_slug": "NousResearch/Hermes-3-Llama-3.1-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B Thinking 2507", + "name": "Nous: Hermes 3 70B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-thinking-2507", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "nousresearch/hermes-3-llama-3.1-70b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 30B A3B Thinking 2507", - "slug": "qwen/qwen3-30b-a3b-thinking-2507", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Hermes 3 70B Instruct", + "slug": "nousresearch/hermes-3-llama-3.1-70b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T21:32:25.189881+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 40960, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -98978,7 +98560,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "aaa39b6c-e488-491a-91e4-e5fb66f86601", + "id": "63d20b98-aa20-4c49-b27e-f2a78653da3b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -98987,103 +98569,94 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "openai", "context_length": 131072, - "created_at": "2025-04-28T21:32:25.189881+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen3", + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-32b-04-28", - "model_variant_slug": "qwen/qwen3-32b", + "model_variant_permaslug": "openai/gpt-oss-20b", + "model_variant_slug": "openai/gpt-oss-20b", "moderation_required": false, - "name": "Nebius | qwen/qwen3-32b-04-28", + "name": "NextBit | openai/gpt-oss-20b", "pricing": { - "completion": "0.0000003", + "completion": "0.00000045", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius/base", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-32B", - "provider_name": "Nebius", + "provider_model_id": "gpt-oss:20b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/base", + "provider_slug": "nextbit/fp8", "quantization": "fp8", "supported_parameters": [ "reasoning", @@ -99091,70 +98664,74 @@ "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice", "response_format", "structured_outputs" ], - "supports_multipart": true, + "supports_multipart": false, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen3", + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-31T14:32:59.359308+00:00", + "context_length": 40960, + "created_at": "2025-04-28T21:41:18.320017+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", + "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, programming, and logical inference, and a \"non-thinking\" mode for general-purpose conversation. The model is fine-tuned for instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling.", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 40960, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { - "supported_parameters": {}, - "supports_input_audio": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -99164,7 +98741,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "43212978-38b8-48a3-ae45-d0009f224b98", + "id": "c399a914-434e-4f21-b11b-8f4b3b2ec72c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -99173,171 +98750,152 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 40960, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 0, - "created_at": "2025-07-31T14:32:59.359308+00:00", + "context_length": 131702, + "created_at": "2025-04-28T21:41:18.320017+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", + "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, programming, and logical inference, and a \"non-thinking\" mode for general-purpose conversation. The model is fine-tuned for instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling.", "features": { "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "hf_slug": "Qwen/Qwen3-14B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 30B A3B Instruct", + "name": "Qwen: Qwen3 14B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "permaslug": "qwen/qwen3-14b-04-28", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Qwen3 Coder 30B A3B Instruct", - "slug": "qwen/qwen3-coder-30b-a3b-instruct", + "short_name": "Qwen3 14B", + "slug": "qwen/qwen3-14b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-30b-a3b-instruct", - "model_variant_slug": "qwen/qwen3-coder-30b-a3b-instruct", + "model_variant_permaslug": "qwen/qwen3-14b-04-28", + "model_variant_slug": "qwen/qwen3-14b", "moderation_required": false, - "name": "Nebius | qwen/qwen3-coder-30b-a3b-instruct", + "name": "NextBit | qwen/qwen3-14b-04-28", "pricing": { - "completion": "0.0000003", + "completion": "0.00000024", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000006" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", - "provider_name": "Nebius", + "provider_model_id": "qwen3:14b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp8", - "quantization": "fp8", + "provider_slug": "nextbit/int4", + "quantization": "int4", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice", - "response_format", - "structured_outputs" + "presence_penalty" ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_multipart": false, + "supports_reasoning": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "hf_slug": "Qwen/Qwen3-14B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 30B A3B Instruct", + "name": "Qwen: Qwen3 14B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "permaslug": "qwen/qwen3-14b-04-28", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Qwen3 Coder 30B A3B Instruct", - "slug": "qwen/qwen3-coder-30b-a3b-instruct", + "short_name": "Qwen3 14B", + "slug": "qwen/qwen3-14b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-23T00:29:06+00:00", - "default_parameters": {}, - "default_stops": [], + "context_length": 32768, + "created_at": "2025-04-28T22:16:44.177326+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { @@ -99350,7 +98908,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "abdef935-3e39-46b2-a40e-fb5920f88abb", + "id": "a1a3bc7d-2e7a-4797-8993-4f4f28be5e2d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -99359,179 +98917,165 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 1048576, - "created_at": "2025-07-23T00:29:06+00:00", - "default_parameters": {}, - "default_stops": [], + "context_length": 131072, + "created_at": "2025-04-28T22:16:44.177326+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "Qwen/Qwen3-30B-A3B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Qwen: Qwen3 30B A3B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "permaslug": "qwen/qwen3-30b-a3b-04-28", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 30B A3B", + "slug": "qwen/qwen3-30b-a3b", + "updated_at": "2026-01-08T19:57:57.475571+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "model_variant_slug": "qwen/qwen3-coder", + "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", + "model_variant_slug": "qwen/qwen3-30b-a3b", "moderation_required": false, - "name": "Nebius | qwen/qwen3-coder-480b-a35b-07-25", + "name": "NextBit | qwen/qwen3-30b-a3b-04-28", "pricing": { - "completion": "0.0000018", + "completion": "0.00000055", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000014" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", - "provider_name": "Nebius", + "provider_model_id": "qwen3:30b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp8", + "provider_slug": "nextbit/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice", "response_format", "structured_outputs" ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_multipart": false, + "supports_reasoning": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "Qwen/Qwen3-30B-A3B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Qwen: Qwen3 30B A3B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "permaslug": "qwen/qwen3-30b-a3b-04-28", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 30B A3B", + "slug": "qwen/qwen3-30b-a3b", + "updated_at": "2026-01-08T19:57:57.475571+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 32000, - "created_at": "2025-10-28T19:43:42.126124+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "context_length": 32768, + "created_at": "2025-03-05T21:06:54.875499+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", + "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 32000, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { - "supports_input_audio": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -99541,7 +99085,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "2b66411e-65b9-4964-b19c-e6ce40752434", + "id": "33a3c4b0-4a52-488f-a739-56bfe81ae0f5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -99550,185 +99094,155 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32000, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 32000, - "created_at": "2025-10-28T19:43:42.126124+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "context_length": 131072, + "created_at": "2025-03-05T21:06:54.875499+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", + "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "Other", - "has_text_output": false, - "hf_slug": "Qwen/Qwen3-Embedding-8B", + "group": "Qwen", + "has_text_output": true, + "hf_slug": "Qwen/QwQ-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwq", "model_version_group_id": null, - "name": "Qwen: Qwen3 Embedding 8B", - "output_modalities": ["embeddings"], - "permaslug": "qwen/qwen3-embedding-8b", + "name": "Qwen: QwQ 32B", + "output_modalities": ["text"], + "permaslug": "qwen/qwq-32b", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Qwen3 Embedding 8B", - "slug": "qwen/qwen3-embedding-8b", + "short_name": "QwQ 32B", + "slug": "qwen/qwq-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-embedding-8b", - "model_variant_slug": "qwen/qwen3-embedding-8b", + "model_variant_permaslug": "qwen/qwq-32b", + "model_variant_slug": "qwen/qwq-32b", "moderation_required": false, - "name": "Nebius | qwen/qwen3-embedding-8b", + "name": "NextBit | qwen/qwq-32b", "pricing": { - "completion": "0", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Embedding-8B", - "provider_name": "Nebius", + "provider_model_id": "qwq:32b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius", - "quantization": "unknown", + "provider_slug": "nextbit/int4", + "quantization": "int4", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", - "presence_penalty", - "seed", - "top_k", - "logit_bias", - "logprobs", - "top_logprobs" + "presence_penalty" ], - "supports_multipart": true, - "supports_reasoning": false, + "supports_multipart": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "Other", - "has_text_output": false, - "hf_slug": "Qwen/Qwen3-Embedding-8B", + "group": "Qwen", + "has_text_output": true, + "hf_slug": "Qwen/QwQ-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwq", "model_version_group_id": null, - "name": "Qwen: Qwen3 Embedding 8B", - "output_modalities": ["embeddings"], - "permaslug": "qwen/qwen3-embedding-8b", + "name": "Qwen: QwQ 32B", + "output_modalities": ["text"], + "permaslug": "qwen/qwq-32b", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Qwen3 Embedding 8B", - "slug": "qwen/qwen3-embedding-8b", + "short_name": "QwQ 32B", + "slug": "qwen/qwq-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 128000, - "created_at": "2025-09-11T17:38:04.192907+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "undi95", + "context_length": 6144, + "created_at": "2023-07-22T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["###", ""], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", + "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 128000, + "context_length": 6144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { - "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -99738,7 +99252,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "4dc7f3c5-67c9-419a-8d64-0dffa7d3e7cc", + "id": "061b3f45-8511-4ad9-8537-a5ac7aeeb163", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -99747,190 +99261,133 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:38:04.192907+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "undi95", + "context_length": 4096, + "created_at": "2023-07-22T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["###", ""], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", + "features": {}, + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "hf_slug": "Undi95/ReMM-SLERP-L2-13B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "name": "ReMM SLERP 13B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "undi95/remm-slerp-l2-13b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Next 80B A3B Thinking", - "slug": "qwen/qwen3-next-80b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "ReMM SLERP 13B", + "slug": "undi95/remm-slerp-l2-13b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-thinking", + "model_variant_permaslug": "undi95/remm-slerp-l2-13b", + "model_variant_slug": "undi95/remm-slerp-l2-13b", "moderation_required": false, - "name": "Nebius | qwen/qwen3-next-80b-a3b-thinking-2509", + "name": "NextBit | undi95/remm-slerp-l2-13b", "pricing": { - "completion": "0.0000012", + "completion": "0.00000065", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000045" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking", - "provider_name": "Nebius", + "provider_model_id": "remm-slerp:l2-13b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp8", - "quantization": "fp8", + "provider_slug": "nextbit/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", - "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice", - "response_format", - "structured_outputs" + "presence_penalty" ], - "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_multipart": false, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama2", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "hf_slug": "Undi95/ReMM-SLERP-L2-13B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "alpaca", "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "name": "ReMM SLERP 13B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "undi95/remm-slerp-l2-13b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Next 80B A3B Thinking", - "slug": "qwen/qwen3-next-80b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "ReMM SLERP 13B", + "slug": "undi95/remm-slerp-l2-13b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-07-25T19:22:27.278283+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, - "default_stops": [], + "author": "sao10k", + "context_length": 32768, + "created_at": "2024-08-28T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -99940,7 +99397,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "fde64dda-3785-4195-95cb-8f4014802805", + "id": "8b0ca9ca-37a1-4753-86eb-81e091a083ad", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -99949,190 +99406,133 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", + "author": "sao10k", "context_length": 131072, - "created_at": "2025-07-25T19:22:27.278283+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, - "default_stops": [], + "created_at": "2024-08-28T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5", + "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Z.AI: GLM 4.5", + "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "sao10k/l3.1-euryale-70b", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5", - "slug": "z-ai/glm-4.5", - "updated_at": "2026-01-05T22:04:10.598351+00:00", + "short_name": "Llama 3.1 Euryale 70B v2.2", + "slug": "sao10k/l3.1-euryale-70b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.5", - "model_variant_slug": "z-ai/glm-4.5", + "model_variant_permaslug": "sao10k/l3.1-euryale-70b", + "model_variant_slug": "sao10k/l3.1-euryale-70b", "moderation_required": false, - "name": "Nebius | z-ai/glm-4.5", + "name": "NextBit | sao10k/l3.1-euryale-70b", "pricing": { - "completion": "0.0000022", + "completion": "0.00000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000065" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "zai-org/GLM-4.5", - "provider_name": "Nebius", + "provider_model_id": "euryale:31-70b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp8", + "provider_slug": "nextbit/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice" + "response_format", + "structured_outputs" ], - "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_multipart": false, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5", + "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Z.AI: GLM 4.5", + "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "sao10k/l3.1-euryale-70b", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5", - "slug": "z-ai/glm-4.5", - "updated_at": "2026-01-05T22:04:10.598351+00:00", + "short_name": "Llama 3.1 Euryale 70B v2.2", + "slug": "sao10k/l3.1-euryale-70b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", + "author": "sao10k", "context_length": 131072, - "created_at": "2025-07-25T19:20:58.066206+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, - "default_stops": [], + "created_at": "2024-12-18T15:32:08.468786+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", "endpoint": { - "adapter_name": "NebiusAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": false, "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -100142,7 +99542,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "e16179dc-e37e-4c38-af45-cb1df5773f10", + "id": "fde3788e-a185-4487-93a7-10fb4412d412", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -100151,187 +99551,129 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-07-25T19:20:58.066206+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, - "default_stops": [], + "author": "sao10k", + "context_length": 8192, + "created_at": "2024-12-18T15:32:08.468786+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5-Air", + "hf_slug": "Sao10K/L3.3-70B-Euryale-v2.3", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Z.AI: GLM 4.5 Air", + "name": "Sao10K: Llama 3.3 Euryale 70B", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5-air", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "sao10k/l3.3-euryale-70b-v2.3", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5 Air", - "slug": "z-ai/glm-4.5-air", + "short_name": "Llama 3.3 Euryale 70B", + "slug": "sao10k/l3.3-euryale-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.5-air", - "model_variant_slug": "z-ai/glm-4.5-air", + "model_variant_permaslug": "sao10k/l3.3-euryale-70b-v2.3", + "model_variant_slug": "sao10k/l3.3-euryale-70b", "moderation_required": false, - "name": "Nebius | z-ai/glm-4.5-air", + "name": "NextBit | sao10k/l3.3-euryale-70b-v2.3", "pricing": { - "completion": "0.0000012", + "completion": "0.00000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000065" }, - "provider_display_name": "Nebius Token Factory", + "provider_display_name": "NextBit", "provider_info": { - "adapterName": "NebiusAdapter", - "baseUrl": "https://api.studio.nebius.ai/v1", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.nextbit256.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://docs.nebius.com/legal/studio/privacy/", + "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://docs.nebius.com/legal/studio/terms-of-use/", + "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", "training": false }, - "displayName": "Nebius Token Factory", + "displayName": "NextBit", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "NL", "icon": { - "className": "invert-0 dark:invert", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://docs.nebius.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" }, - "ignoredProviderModels": [ - "Qwen/Qwen3-Embedding-8B", - "BAAI/bge-en-icl", - "BAAI/bge-multilingual-gemma2", - "intfloat/e5-mistral-7b-instruct", - "Qwen/Qwen3-235B-A22B-Thinking-2507", - "black-forest-labs/flux-dev", - "black-forest-labs/flux-schnell", - "google/gemma-2-2b-it", - "NousResearch/Hermes-4-70B", - "NousResearch/Hermes-4-405B" - ], + "ignoredProviderModels": [], "isAbortable": false, - "isMultipartSupported": true, + "isMultipartSupported": false, "moderationRequired": false, - "name": "Nebius", + "name": "NextBit", "owners": ["{}"], - "slug": "nebius", + "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "zai-org/GLM-4.5-Air", - "provider_name": "Nebius", + "provider_model_id": "euryale:33-70b", + "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nebius/fp8", - "quantization": "fp8", + "provider_slug": "nextbit/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", + "stop", "frequency_penalty", "presence_penalty", - "top_k", - "repetition_penalty", - "tools", - "tool_choice" + "response_format", + "structured_outputs" ], - "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_multipart": false, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5-Air", + "hf_slug": "Sao10K/L3.3-70B-Euryale-v2.3", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Z.AI: GLM 4.5 Air", + "name": "Sao10K: Llama 3.3 Euryale 70B", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5-air", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "sao10k/l3.3-euryale-70b-v2.3", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5 Air", - "slug": "z-ai/glm-4.5-air", + "short_name": "Llama 3.3 Euryale 70B", + "slug": "sao10k/l3.3-euryale-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Nebius", - "slug": "nebius" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "NextBit", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" - }, - "models": [ + }, { - "author": "deepseek", - "context_length": 32768, - "created_at": "2025-01-29T23:53:50.865297+00:00", - "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "thedrummer", + "context_length": 131072, + "created_at": "2025-09-27T00:11:18.116138+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\\n\\nOther benchmark results include:\\n\\n- AIME 2024 pass@1: 72.6\\n- MATH-500 pass@1: 94.3\\n- CodeForces Rating: 1691\\n\\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", @@ -100340,10 +99682,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -100352,8 +99690,8 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "cde873b2-4187-40df-953f-c6e04ff7fd19", + "has_completions": true, + "id": "9d5db8f1-2899-4304-8f79-09212049ad13", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -100362,57 +99700,58 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 128000, - "created_at": "2025-01-29T23:53:50.865297+00:00", - "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "thedrummer", + "context_length": 131072, + "created_at": "2025-09-27T00:11:18.116138+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\\n\\nOther benchmark results include:\\n\\n- AIME 2024 pass@1: 72.6\\n- MATH-500 pass@1: 94.3\\n- CodeForces Rating: 1691\\n\\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "hf_slug": "thedrummer/cydonia-24b-v4.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Qwen 32B", + "instruct_type": null, + "model_version_group_id": null, + "name": "TheDrummer: Cydonia 24B V4.1", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-qwen-32b", + "permaslug": "thedrummer/cydonia-24b-v4.1", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "R1 Distill Qwen 32B", - "slug": "deepseek/deepseek-r1-distill-qwen-32b", + "short_name": "Cydonia 24B V4.1", + "slug": "thedrummer/cydonia-24b-v4.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-distill-qwen-32b", - "model_variant_slug": "deepseek/deepseek-r1-distill-qwen-32b", + "model_variant_permaslug": "thedrummer/cydonia-24b-v4.1", + "model_variant_slug": "thedrummer/cydonia-24b-v4.1", "moderation_required": false, - "name": "NextBit | deepseek/deepseek-r1-distill-qwen-32b", + "name": "NextBit | thedrummer/cydonia-24b-v4.1", "pricing": { - "completion": "0.00000029", + "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000029", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "NextBit", "provider_info": { @@ -100442,68 +99781,68 @@ "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "deepseek:qwen3-r1-32b", + "provider_model_id": "cydonia:24b", "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nextbit/fp8", - "quantization": "fp8", + "provider_slug": "nextbit/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", - "presence_penalty" + "presence_penalty", + "response_format", + "structured_outputs" ], "supports_multipart": false, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "hf_slug": "thedrummer/cydonia-24b-v4.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Qwen 32B", + "instruct_type": null, + "model_version_group_id": null, + "name": "TheDrummer: Cydonia 24B V4.1", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-qwen-32b", + "permaslug": "thedrummer/cydonia-24b-v4.1", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "R1 Distill Qwen 32B", - "slug": "deepseek/deepseek-r1-distill-qwen-32b", + "short_name": "Cydonia 24B V4.1", + "slug": "thedrummer/cydonia-24b-v4.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 8192, - "created_at": "2024-07-13T00:00:00+00:00", + "author": "thedrummer", + "context_length": 32768, + "created_at": "2024-09-30T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["", "", ""], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).\n\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", + "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 8192, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", @@ -100512,6 +99851,10 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -100521,7 +99864,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "03a23b91-c1f2-4a5e-891b-df7e13c87472", + "id": "94c3af54-1ed5-400c-affb-570e2935d725", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -100530,49 +99873,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 8192, - "created_at": "2024-07-13T00:00:00+00:00", + "author": "thedrummer", + "context_length": 32768, + "created_at": "2024-09-30T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["", "", ""], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini).\n\nGemma models are well-suited for a variety of text generation tasks, including question answering, summarization, and reasoning.\n\nSee the [launch announcement](https://blog.google/technology/developers/google-gemma-2/) for more details. Usage of Gemma is subject to Google's [Gemma Terms of Use](https://ai.google.dev/gemma/terms).", + "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", "features": {}, - "group": "Gemini", + "group": "Qwen", "has_text_output": true, - "hf_slug": "google/gemma-2-27b-it", + "hf_slug": "TheDrummer/Rocinante-12B-v1.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "gemma", + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Google: Gemma 2 27B", + "name": "TheDrummer: Rocinante 12B", "output_modalities": ["text"], - "permaslug": "google/gemma-2-27b-it", + "permaslug": "thedrummer/rocinante-12b", "reasoning_config": null, "router": null, - "short_name": "Gemma 2 27B", - "slug": "google/gemma-2-27b-it", + "short_name": "Rocinante 12B", + "slug": "thedrummer/rocinante-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-2-27b-it", - "model_variant_slug": "google/gemma-2-27b-it", + "model_variant_permaslug": "thedrummer/rocinante-12b", + "model_variant_slug": "thedrummer/rocinante-12b", "moderation_required": false, - "name": "NextBit | google/gemma-2-27b-it", + "name": "NextBit | thedrummer/rocinante-12b", "pricing": { - "completion": "0.00000065", + "completion": "0.00000043", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000065", - "request": "0", - "web_search": "0" + "prompt": "0.00000017" }, "provider_display_name": "NextBit", "provider_info": { @@ -100602,58 +99940,60 @@ "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "gemma-2:27b-it", + "provider_model_id": "rocinante:12b", "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nextbit/int4", - "quantization": "int4", + "provider_slug": "nextbit/bf16", + "quantization": "bf16", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "response_format", - "structured_outputs" + "tools", + "tool_choice" ], "supports_multipart": false, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Gemini", + "group": "Qwen", "has_text_output": true, - "hf_slug": "google/gemma-2-27b-it", + "hf_slug": "TheDrummer/Rocinante-12B-v1.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "gemma", + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Google: Gemma 2 27B", + "name": "TheDrummer: Rocinante 12B", "output_modalities": ["text"], - "permaslug": "google/gemma-2-27b-it", + "permaslug": "thedrummer/rocinante-12b", "reasoning_config": null, "router": null, - "short_name": "Gemma 2 27B", - "slug": "google/gemma-2-27b-it", + "short_name": "Rocinante 12B", + "slug": "thedrummer/rocinante-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "microsoft", - "context_length": 16384, - "created_at": "2025-01-10T06:17:52.16346+00:00", + "author": "thedrummer", + "context_length": 32768, + "created_at": "2024-11-08T22:04:08.359811+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["[INST]", ""], "default_system": null, - "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", + "description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 16384, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", @@ -100662,9 +100002,6 @@ "training": false }, "features": { - "supported_parameters": { - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -100674,7 +100011,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "bfdad85e-1565-4ab4-a4ea-1a4772a11341", + "id": "08cb2fef-fad6-4a41-b292-b2d79f782a08", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -100683,49 +100020,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "microsoft", - "context_length": 16384, - "created_at": "2025-01-10T06:17:52.16346+00:00", + "author": "thedrummer", + "context_length": 32000, + "created_at": "2024-11-08T22:04:08.359811+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["[INST]", ""], "default_system": null, - "description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. \n\nAt 14 billion parameters, it was trained on a mix of high-quality synthetic datasets, data from curated websites, and academic materials. It has undergone careful improvement to follow instructions accurately and maintain strong safety standards. It works best with English language inputs.\n\nFor more information, please see [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)\n", + "description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.", "features": {}, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": "microsoft/phi-4", + "hf_slug": "TheDrummer/UnslopNemo-12B-v4.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Microsoft: Phi 4", + "name": "TheDrummer: UnslopNemo 12B", "output_modalities": ["text"], - "permaslug": "microsoft/phi-4", + "permaslug": "thedrummer/unslopnemo-12b", "reasoning_config": null, "router": null, - "short_name": "Phi 4", - "slug": "microsoft/phi-4", + "short_name": "UnslopNemo 12B", + "slug": "thedrummer/unslopnemo-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "microsoft/phi-4", - "model_variant_slug": "microsoft/phi-4", + "model_variant_permaslug": "thedrummer/unslopnemo-12b", + "model_variant_slug": "thedrummer/unslopnemo-12b", "moderation_required": false, - "name": "NextBit | microsoft/phi-4", + "name": "NextBit | thedrummer/unslopnemo-12b", "pricing": { - "completion": "0.00000014", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, "provider_display_name": "NextBit", "provider_info": { @@ -100755,58 +100087,64 @@ "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "microsoft:phi-4", + "provider_model_id": "unslopnemo:12b", "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nextbit/int4", - "quantization": "int4", + "provider_slug": "nextbit/fp8", + "quantization": "fp8", "supported_parameters": [ - "structured_outputs", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "response_format" + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": false, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Other", + "group": "Mistral", "has_text_output": true, - "hf_slug": "microsoft/phi-4", + "hf_slug": "TheDrummer/UnslopNemo-12B-v4.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Microsoft: Phi 4", + "name": "TheDrummer: UnslopNemo 12B", "output_modalities": ["text"], - "permaslug": "microsoft/phi-4", + "permaslug": "thedrummer/unslopnemo-12b", "reasoning_config": null, "router": null, - "short_name": "Phi 4", - "slug": "microsoft/phi-4", + "short_name": "UnslopNemo 12B", + "slug": "thedrummer/unslopnemo-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "gryphe", - "context_length": 4096, - "created_at": "2023-07-02T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "z-ai", + "context_length": 202752, + "created_at": "2025-12-22T04:33:34.884504+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": false, - "context_length": 4096, + "context_length": 202752, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", @@ -100815,10 +100153,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -100828,7 +100163,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "3a9bbd50-68e9-4ebf-92e4-faa0395a1229", + "id": "8879a789-582e-44fb-ab26-6f810c3e38ea", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -100837,49 +100172,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 202752, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "gryphe", - "context_length": 4096, - "created_at": "2023-07-02T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", - "features": {}, - "group": "Llama2", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Gryphe/MythoMax-L2-13b", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "MythoMax 13B", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "gryphe/mythomax-l2-13b", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.7-20251222", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "MythoMax 13B", - "slug": "gryphe/mythomax-l2-13b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, - "model_variant_permaslug": "gryphe/mythomax-l2-13b", - "model_variant_slug": "gryphe/mythomax-l2-13b", + "model_variant_permaslug": "z-ai/glm-4.7-20251222", + "model_variant_slug": "z-ai/glm-4.7", "moderation_required": false, - "name": "NextBit | gryphe/mythomax-l2-13b", + "name": "NextBit | z-ai/glm-4.7-20251222", "pricing": { - "completion": "0.00000006", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "NextBit", "provider_info": { @@ -100909,70 +100254,105 @@ "slug": "nextbit", "statusPageUrl": null }, - "provider_model_id": "mythomax:13b", + "provider_model_id": "glm:4-7", "provider_name": "NextBit", "provider_region": null, - "provider_slug": "nextbit/int4", - "quantization": "int4", + "provider_slug": "nextbit/fp4", + "quantization": "fp4", "supported_parameters": [ - "structured_outputs", - "response_format", + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", - "presence_penalty" + "presence_penalty", + "response_format", + "structured_outputs", + "tools" ], "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama2", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Gryphe/MythoMax-L2-13b", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "MythoMax 13B", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "gryphe/mythomax-l2-13b", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.7-20251222", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "MythoMax 13B", - "slug": "gryphe/mythomax-l2-13b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null - }, + } + ], + "name": "NextBit", + "slug": "nextbit" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "NovitaAI", + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + }, + "models": [ { - "author": "neversleep", - "context_length": 32768, - "created_at": "2024-09-15T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "baidu", + "context_length": 120000, + "created_at": "2025-08-12T21:29:27.753118+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.8, + "top_p": 0.8 + }, + "default_stops": [], "default_system": null, - "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/models/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 32768, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 120000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { "supported_parameters": { - "response_format": true, - "structured_outputs": true + "response_format": false, + "structured_outputs": false }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -100982,7 +100362,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "12d1e58a-8433-4273-8b0e-1a2dcb742d6f", + "id": "83e51832-4cf9-4bcb-ad50-10e150d48b86", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -100991,138 +100371,188 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 8000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "neversleep", + "author": "baidu", "context_length": 131072, - "created_at": "2024-09-15T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "created_at": "2025-08-12T21:29:27.753118+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.8, + "top_p": 0.8 + }, + "default_stops": [], "default_system": null, - "description": "Lumimaid v0.2 8B is a finetune of [Llama 3.1 8B](/models/meta-llama/llama-3.1-8b-instruct) with a \"HUGE step up dataset wise\" compared to Lumimaid v0.1. Sloppy chats output were purged.\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "NeverSleep/Lumimaid-v0.2-8B", + "hf_slug": "baidu/ERNIE-4.5-21B-A3B-PT", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "NeverSleep: Lumimaid v0.2 8B", + "name": "Baidu: ERNIE 4.5 21B A3B", "output_modalities": ["text"], - "permaslug": "neversleep/llama-3.1-lumimaid-8b", - "reasoning_config": null, + "permaslug": "baidu/ernie-4.5-21b-a3b", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Lumimaid v0.2 8B", - "slug": "neversleep/llama-3.1-lumimaid-8b", + "short_name": "ERNIE 4.5 21B A3B", + "slug": "baidu/ernie-4.5-21b-a3b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "neversleep/llama-3.1-lumimaid-8b", - "model_variant_slug": "neversleep/llama-3.1-lumimaid-8b", + "model_variant_permaslug": "baidu/ernie-4.5-21b-a3b", + "model_variant_slug": "baidu/ernie-4.5-21b-a3b", "moderation_required": false, - "name": "NextBit | neversleep/llama-3.1-lumimaid-8b", + "name": "Novita | baidu/ernie-4.5-21b-a3b", "pricing": { - "completion": "0.0000006", + "completion": "0.00000028", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.00000007" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "lumimaid:llama31-8b", - "provider_name": "NextBit", - "provider_region": null, - "provider_slug": "nextbit/int4", - "quantization": "int4", + "provider_model_id": "baidu/ernie-4.5-21B-a3b", + "provider_name": "Novita", + "provider_region": null, + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", - "presence_penalty" + "presence_penalty", + "seed", + "top_k", + "repetition_penalty", + "tools", + "tool_choice" ], - "supports_multipart": false, + "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "NeverSleep/Lumimaid-v0.2-8B", + "hf_slug": "baidu/ERNIE-4.5-21B-A3B-PT", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "NeverSleep: Lumimaid v0.2 8B", + "name": "Baidu: ERNIE 4.5 21B A3B", "output_modalities": ["text"], - "permaslug": "neversleep/llama-3.1-lumimaid-8b", - "reasoning_config": null, + "permaslug": "baidu/ernie-4.5-21b-a3b", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Lumimaid v0.2 8B", - "slug": "neversleep/llama-3.1-lumimaid-8b", + "short_name": "ERNIE 4.5 21B A3B", + "slug": "baidu/ernie-4.5-21b-a3b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "neversleep", - "context_length": 4096, - "created_at": "2023-11-26T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "baidu", + "context_length": 131072, + "created_at": "2025-10-09T22:28:07.216653+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.\n\n#merge #uncensored", + "description": "ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation, and expert-level academic benchmarks.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 4096, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -101132,7 +100562,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "8d930e6b-bb96-4daf-8aae-549d21e4b3a0", + "id": "af4c86e1-0b7f-4a10-a655-eed49e3a1054", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -101141,138 +100571,180 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "neversleep", - "context_length": 8192, - "created_at": "2023-11-26T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "baidu", + "context_length": 131072, + "created_at": "2025-10-09T22:28:07.216653+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "A collab between IkariDev and Undi. This merge is suitable for RP, ERP, and general knowledge.\n\n#merge #uncensored", - "features": {}, - "group": "Llama2", + "description": "ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation, and expert-level academic benchmarks.", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "NeverSleep/Noromaid-20b-v0.1.1", + "hf_slug": "baidu/ERNIE-4.5-21B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "Noromaid 20B", + "name": "Baidu: ERNIE 4.5 21B A3B Thinking", "output_modalities": ["text"], - "permaslug": "neversleep/noromaid-20b", - "reasoning_config": null, + "permaslug": "baidu/ernie-4.5-21b-a3b-thinking", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Noromaid 20B", - "slug": "neversleep/noromaid-20b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "ERNIE 4.5 21B A3B Thinking", + "slug": "baidu/ernie-4.5-21b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "neversleep/noromaid-20b", - "model_variant_slug": "neversleep/noromaid-20b", + "model_variant_permaslug": "baidu/ernie-4.5-21b-a3b-thinking", + "model_variant_slug": "baidu/ernie-4.5-21b-a3b-thinking", "moderation_required": false, - "name": "NextBit | neversleep/noromaid-20b", + "name": "Novita | baidu/ernie-4.5-21b-a3b-thinking", "pricing": { - "completion": "0.00000175", + "completion": "0.00000028", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000007" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "noromaid:20b", - "provider_name": "NextBit", + "provider_model_id": "baidu/ernie-4.5-21B-a3b-thinking", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/int4", - "quantization": "int4", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "response_format", - "structured_outputs" + "seed", + "top_k", + "repetition_penalty" ], - "supports_multipart": false, - "supports_reasoning": false, + "supports_multipart": true, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama2", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "NeverSleep/Noromaid-20b-v0.1.1", + "hf_slug": "baidu/ERNIE-4.5-21B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": null, "model_version_group_id": null, - "name": "Noromaid 20B", + "name": "Baidu: ERNIE 4.5 21B A3B Thinking", "output_modalities": ["text"], - "permaslug": "neversleep/noromaid-20b", - "reasoning_config": null, + "permaslug": "baidu/ernie-4.5-21b-a3b-thinking", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Noromaid 20B", - "slug": "neversleep/noromaid-20b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "ERNIE 4.5 21B A3B Thinking", + "slug": "baidu/ernie-4.5-21b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "nousresearch", - "context_length": 65536, - "created_at": "2024-08-18T00:00:00+00:00", + "author": "baidu", + "context_length": 123000, + "created_at": "2025-06-30T16:15:39.588489+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": [], "default_system": null, - "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", + "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 65536, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 123000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -101282,7 +100754,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "19e60da4-d0e0-4799-a949-ea786279a5cb", + "id": "0ca3fe58-73fa-4687-a6b4-73c0657fce71", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -101291,83 +100763,107 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 12000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nousresearch", + "author": "baidu", "context_length": 131072, - "created_at": "2024-08-18T00:00:00+00:00", + "created_at": "2025-06-30T16:15:39.588489+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": [], "default_system": null, - "description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 70B is a competitive, if not superior finetune of the [Llama-3.1 70B foundation model](/models/meta-llama/llama-3.1-70b-instruct), focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.", - "features": {}, - "group": "Llama3", + "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-3-Llama-3.1-70B", + "hf_slug": "baidu/ERNIE-4.5-300B-A47B-PT", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Nous: Hermes 3 70B Instruct", + "name": "Baidu: ERNIE 4.5 300B A47B ", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-3-llama-3.1-70b", - "reasoning_config": null, + "permaslug": "baidu/ernie-4.5-300b-a47b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Hermes 3 70B Instruct", - "slug": "nousresearch/hermes-3-llama-3.1-70b", + "short_name": "ERNIE 4.5 300B A47B ", + "slug": "baidu/ernie-4.5-300b-a47b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "nousresearch/hermes-3-llama-3.1-70b", - "model_variant_slug": "nousresearch/hermes-3-llama-3.1-70b", + "model_variant_permaslug": "baidu/ernie-4.5-300b-a47b", + "model_variant_slug": "baidu/ernie-4.5-300b-a47b", "moderation_required": false, - "name": "NextBit | nousresearch/hermes-3-llama-3.1-70b", + "name": "Novita | baidu/ernie-4.5-300b-a47b", "pricing": { - "completion": "0.0000003", + "completion": "0.0000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000028" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "hermes-3:llama-31-70b", - "provider_name": "NextBit", + "provider_model_id": "baidu/ernie-4.5-300b-a47b-paddle", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", @@ -101375,58 +100871,69 @@ "stop", "frequency_penalty", "presence_penalty", + "seed", + "top_k", + "repetition_penalty", "response_format", "structured_outputs" ], - "supports_multipart": false, + "supports_multipart": true, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-3-Llama-3.1-70B", + "hf_slug": "baidu/ERNIE-4.5-300B-A47B-PT", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Nous: Hermes 3 70B Instruct", + "name": "Baidu: ERNIE 4.5 300B A47B ", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-3-llama-3.1-70b", - "reasoning_config": null, + "permaslug": "baidu/ernie-4.5-300b-a47b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Hermes 3 70B Instruct", - "slug": "nousresearch/hermes-3-llama-3.1-70b", + "short_name": "ERNIE 4.5 300B A47B ", + "slug": "baidu/ernie-4.5-300b-a47b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "nousresearch", - "context_length": 8192, - "created_at": "2024-05-27T00:00:00+00:00", + "author": "baidu", + "context_length": 30000, + "created_at": "2025-08-12T21:07:16.565993+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": [], "default_system": null, - "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.", + "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 8192, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 30000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -101436,7 +100943,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "659eebd9-4f44-478d-b086-027d92bce33b", + "id": "75e64609-6b51-4e35-96ea-5065c6fbda63", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -101445,139 +100952,174 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": 8000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nousresearch", - "context_length": 8192, - "created_at": "2024-05-27T00:00:00+00:00", + "author": "baidu", + "context_length": 131072, + "created_at": "2025-08-12T21:07:16.565993+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": [], "default_system": null, - "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.", - "features": {}, - "group": "Llama3", + "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-2-Pro-Llama-3-8B", + "hf_slug": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", + "name": "Baidu: ERNIE 4.5 VL 28B A3B", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-2-pro-llama-3-8b", - "reasoning_config": null, + "permaslug": "baidu/ernie-4.5-vl-28b-a3b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Hermes 2 Pro - Llama-3 8B", - "slug": "nousresearch/hermes-2-pro-llama-3-8b", + "short_name": "ERNIE 4.5 VL 28B A3B", + "slug": "baidu/ernie-4.5-vl-28b-a3b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "nousresearch/hermes-2-pro-llama-3-8b", - "model_variant_slug": "nousresearch/hermes-2-pro-llama-3-8b", + "model_variant_permaslug": "baidu/ernie-4.5-vl-28b-a3b", + "model_variant_slug": "baidu/ernie-4.5-vl-28b-a3b", "moderation_required": false, - "name": "NextBit | nousresearch/hermes-2-pro-llama-3-8b", + "name": "Novita | baidu/ernie-4.5-vl-28b-a3b", "pricing": { - "completion": "0.00000008", + "completion": "0.00000056", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000025", - "request": "0", - "web_search": "0" + "prompt": "0.00000014" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "hermes-2:pro-llama-3-8b", - "provider_name": "NextBit", + "provider_model_id": "baidu/ernie-4.5-vl-28b-a3b", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/int4", - "quantization": "int4", + "provider_slug": "novita/fp16", + "quantization": "fp16", "supported_parameters": [ - "structured_outputs", - "response_format", + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", - "presence_penalty" + "presence_penalty", + "seed", + "top_k", + "repetition_penalty", + "tools", + "tool_choice" ], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-2-Pro-Llama-3-8B", + "hf_slug": "baidu/ERNIE-4.5-VL-28B-A3B-PT", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", + "name": "Baidu: ERNIE 4.5 VL 28B A3B", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-2-pro-llama-3-8b", - "reasoning_config": null, + "permaslug": "baidu/ernie-4.5-vl-28b-a3b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Hermes 2 Pro - Llama-3 8B", - "slug": "nousresearch/hermes-2-pro-llama-3-8b", + "short_name": "ERNIE 4.5 VL 28B A3B", + "slug": "baidu/ernie-4.5-vl-28b-a3b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "baidu", + "context_length": 123000, + "created_at": "2025-06-30T16:28:23.022047+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 131072, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 123000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { @@ -101585,6 +101127,7 @@ "response_format": false, "structured_outputs": false }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -101594,7 +101137,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "63d20b98-aa20-4c49-b27e-f2a78653da3b", + "id": "1fe59382-263d-4107-a768-d7d2007bd3b6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -101603,100 +101146,107 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", + "author": "baidu", "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "created_at": "2025-06-30T16:28:23.022047+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "baidu/ERNIE-4.5-VL-424B-A47B-PT", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Baidu: ERNIE 4.5 VL 424B A47B ", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "baidu/ernie-4.5-vl-424b-a47b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "ERNIE 4.5 VL 424B A47B ", + "slug": "baidu/ernie-4.5-vl-424b-a47b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "baidu/ernie-4.5-vl-424b-a47b", + "model_variant_slug": "baidu/ernie-4.5-vl-424b-a47b", "moderation_required": false, - "name": "NextBit | openai/gpt-oss-20b", + "name": "Novita | baidu/ernie-4.5-vl-424b-a47b", "pricing": { - "completion": "0.00000045", + "completion": "0.00000125", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000042" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "gpt-oss:20b", - "provider_name": "NextBit", + "provider_model_id": "baidu/ernie-4.5-vl-424b-a47b", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/fp8", - "quantization": "fp8", + "provider_slug": "novita/fp16", + "quantization": "fp16", "supported_parameters": [ "reasoning", "include_reasoning", @@ -101706,71 +101256,66 @@ "stop", "frequency_penalty", "presence_penalty", - "response_format", - "structured_outputs" + "seed", + "top_k", + "repetition_penalty" ], - "supports_multipart": false, + "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "baidu/ERNIE-4.5-VL-424B-A47B-PT", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Baidu: ERNIE 4.5 VL 424B A47B ", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "baidu/ernie-4.5-vl-424b-a47b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "ERNIE 4.5 VL 424B A47B ", + "slug": "baidu/ernie-4.5-vl-424b-a47b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T21:41:18.320017+00:00", + "author": "deepseek-ai", + "context_length": 64000, + "created_at": "2024-12-26T19:28:40.559917+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, programming, and logical inference, and a \"non-thinking\" mode for general-purpose conversation. The model is fine-tuned for instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling.", + "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 40960, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 64000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -101780,7 +101325,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c399a914-434e-4f21-b11b-8f4b3b2ec72c", + "id": "cefb5a1e-9fea-4496-9a64-4e1b11a7cc8c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -101789,160 +101334,156 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131702, - "created_at": "2025-04-28T21:41:18.320017+00:00", + "author": "deepseek-ai", + "context_length": 131072, + "created_at": "2024-12-26T19:28:40.559917+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, programming, and logical inference, and a \"non-thinking\" mode for general-purpose conversation. The model is fine-tuned for instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen3", + "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", + "features": {}, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-14B", + "hf_slug": "deepseek-ai/DeepSeek-V3", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 14B", + "name": "DeepSeek: DeepSeek V3", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-14b-04-28", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "deepseek/deepseek-chat-v3", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 14B", - "slug": "qwen/qwen3-14b", + "short_name": "DeepSeek V3", + "slug": "deepseek/deepseek-chat", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-14b-04-28", - "model_variant_slug": "qwen/qwen3-14b", + "model_variant_permaslug": "deepseek/deepseek-chat-v3", + "model_variant_slug": "deepseek/deepseek-chat", "moderation_required": false, - "name": "NextBit | qwen/qwen3-14b-04-28", + "name": "Novita | deepseek/deepseek-chat-v3", "pricing": { - "completion": "0.00000024", + "completion": "0.0000013", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen3:14b", - "provider_name": "NextBit", + "provider_model_id": "deepseek/deepseek-v3-turbo", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/int4", - "quantization": "int4", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", - "presence_penalty" + "presence_penalty", + "seed", + "top_k", + "repetition_penalty", + "tools", + "tool_choice" ], - "supports_multipart": false, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen3", + "features": {}, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-14B", + "hf_slug": "deepseek-ai/DeepSeek-V3", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 14B", + "name": "DeepSeek: DeepSeek V3", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-14b-04-28", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "deepseek/deepseek-chat-v3", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 14B", - "slug": "qwen/qwen3-14b", + "short_name": "DeepSeek V3", + "slug": "deepseek/deepseek-chat", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32768, - "created_at": "2025-04-28T22:16:44.177326+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 32768, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -101952,7 +101493,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "a1a3bc7d-2e7a-4797-8993-4f4f28be5e2d", + "id": "3a816eb8-7a0b-4a9a-b497-e3a1efb349fc", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -101961,170 +101502,160 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 163840, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "deepseek", "context_length": 131072, - "created_at": "2025-04-28T22:16:44.177326+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "features": {}, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", - "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "instruct_type": null, + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", - "model_variant_slug": "qwen/qwen3-30b-a3b", + "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", + "model_variant_slug": "deepseek/deepseek-chat-v3-0324", "moderation_required": false, - "name": "NextBit | qwen/qwen3-30b-a3b-04-28", + "name": "Novita | deepseek/deepseek-chat-v3-0324", "pricing": { - "completion": "0.00000055", + "completion": "0.00000112", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000014", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000135", + "prompt": "0.00000027" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen3:30b", - "provider_name": "NextBit", + "provider_model_id": "deepseek/deepseek-v3-0324", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/fp8", + "provider_slug": "novita/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", + "seed", + "top_k", + "repetition_penalty", + "tools", + "tool_choice", "response_format", "structured_outputs" ], - "supports_multipart": false, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", - "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "instruct_type": null, + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32768, - "created_at": "2025-03-05T21:06:54.875499+00:00", + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-08-21T12:33:48+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 32768, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -102134,169 +101665,193 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "33a3c4b0-4a52-488f-a739-56bfe81ae0f5", + "id": "715d05e1-9fa7-4ceb-834a-833f5e26a55a", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 200, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "deepseek", "context_length": 131072, - "created_at": "2025-03-05T21:06:54.875499+00:00", + "created_at": "2025-08-21T12:33:48+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", "features": { "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, - "group": "Qwen", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Qwen/QwQ-32B", + "hf_slug": "deepseek-ai/DeepSeek-V3.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwq", + "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "Qwen: QwQ 32B", + "name": "DeepSeek: DeepSeek V3.1", "output_modalities": ["text"], - "permaslug": "qwen/qwq-32b", + "permaslug": "deepseek/deepseek-chat-v3.1", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "QwQ 32B", - "slug": "qwen/qwq-32b", + "short_name": "DeepSeek V3.1", + "slug": "deepseek/deepseek-chat-v3.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwq-32b", - "model_variant_slug": "qwen/qwq-32b", + "model_variant_permaslug": "deepseek/deepseek-chat-v3.1", + "model_variant_slug": "deepseek/deepseek-chat-v3.1", "moderation_required": false, - "name": "NextBit | qwen/qwq-32b", + "name": "Novita | deepseek/deepseek-chat-v3.1", "pricing": { - "completion": "0.0000004", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000135", + "prompt": "0.00000027" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwq:32b", - "provider_name": "NextBit", + "provider_model_id": "deepseek/deepseek-v3.1", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/int4", - "quantization": "int4", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", - "presence_penalty" + "presence_penalty", + "seed", + "top_k", + "repetition_penalty", + "tools", + "tool_choice", + "response_format", + "structured_outputs" ], - "supports_multipart": false, + "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, - "group": "Qwen", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Qwen/QwQ-32B", + "hf_slug": "deepseek-ai/DeepSeek-V3.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwq", + "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "Qwen: QwQ 32B", + "name": "DeepSeek: DeepSeek V3.1", "output_modalities": ["text"], - "permaslug": "qwen/qwq-32b", + "permaslug": "deepseek/deepseek-chat-v3.1", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "QwQ 32B", - "slug": "qwen/qwq-32b", + "short_name": "DeepSeek V3.1", + "slug": "deepseek/deepseek-chat-v3.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "undi95", - "context_length": 6144, - "created_at": "2023-07-22T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-09-22T13:37:55.611452+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", + "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 6144, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 131072, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -102306,7 +101861,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "061b3f45-8511-4ad9-8537-a5ac7aeeb163", + "id": "812bc18a-5b61-42d0-a550-f9ed8c2164c5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -102315,148 +101870,199 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "undi95", - "context_length": 4096, - "created_at": "2023-07-22T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-09-22T13:37:55.611452+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", - "features": {}, - "group": "Llama2", + "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Undi95/ReMM-SLERP-L2-13B", + "hf_slug": "deepseek-ai/DeepSeek-V3.1-Terminus", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "ReMM SLERP 13B", + "name": "DeepSeek: DeepSeek V3.1 Terminus", "output_modalities": ["text"], - "permaslug": "undi95/remm-slerp-l2-13b", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-v3.1-terminus", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "ReMM SLERP 13B", - "slug": "undi95/remm-slerp-l2-13b", + "short_name": "DeepSeek V3.1 Terminus", + "slug": "deepseek/deepseek-v3.1-terminus", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "undi95/remm-slerp-l2-13b", - "model_variant_slug": "undi95/remm-slerp-l2-13b", + "model_variant_permaslug": "deepseek/deepseek-v3.1-terminus", + "model_variant_slug": "deepseek/deepseek-v3.1-terminus", "moderation_required": false, - "name": "NextBit | undi95/remm-slerp-l2-13b", + "name": "Novita | deepseek/deepseek-v3.1-terminus", "pricing": { - "completion": "0.00000065", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000045", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000135", + "prompt": "0.00000027" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "remm-slerp:l2-13b", - "provider_name": "NextBit", + "provider_model_id": "deepseek/deepseek-v3.1-terminus", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ - "structured_outputs", - "response_format", + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", - "presence_penalty" + "presence_penalty", + "seed", + "top_k", + "repetition_penalty", + "tools", + "tool_choice", + "response_format", + "structured_outputs" ], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama2", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Undi95/ReMM-SLERP-L2-13B", + "hf_slug": "deepseek-ai/DeepSeek-V3.1-Terminus", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "alpaca", + "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "ReMM SLERP 13B", + "name": "DeepSeek: DeepSeek V3.1 Terminus", "output_modalities": ["text"], - "permaslug": "undi95/remm-slerp-l2-13b", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-v3.1-terminus", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "ReMM SLERP 13B", - "slug": "undi95/remm-slerp-l2-13b", + "short_name": "DeepSeek V3.1 Terminus", + "slug": "deepseek/deepseek-v3.1-terminus", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "sao10k", - "context_length": 32768, - "created_at": "2024-08-28T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-12-01T13:10:42.818885+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 32768, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, + "literal_auto": false, + "literal_none": false, "literal_required": true, - "type_function": true + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "8b0ca9ca-37a1-4753-86eb-81e091a083ad", + "id": "8a5fb6d3-ef71-4958-af54-269ffa3a3c6e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -102465,135 +102071,183 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sao10k", + "author": "deepseek", "context_length": 131072, - "created_at": "2024-08-28T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "created_at": "2025-12-01T13:10:42.818885+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", - "features": {}, - "group": "Llama3", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", + "hf_slug": "deepseek-ai/DeepSeek-V3.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", + "name": "DeepSeek: DeepSeek V3.2", "output_modalities": ["text"], - "permaslug": "sao10k/l3.1-euryale-70b", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-v3.2-20251201", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 Euryale 70B v2.2", - "slug": "sao10k/l3.1-euryale-70b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "DeepSeek V3.2", + "slug": "deepseek/deepseek-v3.2", + "updated_at": "2025-12-01T14:46:05.824401+00:00", "warning_message": null }, - "model_variant_permaslug": "sao10k/l3.1-euryale-70b", - "model_variant_slug": "sao10k/l3.1-euryale-70b", + "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", + "model_variant_slug": "deepseek/deepseek-v3.2", "moderation_required": false, - "name": "NextBit | sao10k/l3.1-euryale-70b", + "name": "Novita | deepseek/deepseek-v3.2-20251201", "pricing": { - "completion": "0.00000075", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000065", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000001345", + "prompt": "0.000000269" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "euryale:31-70b", - "provider_name": "NextBit", + "provider_model_id": "deepseek/deepseek-v3.2", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/fp8", + "provider_slug": "novita/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "response_format", - "structured_outputs" + "seed", + "top_k", + "repetition_penalty" ], - "supports_multipart": false, - "supports_reasoning": false, + "supports_multipart": true, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", + "hf_slug": "deepseek-ai/DeepSeek-V3.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", + "name": "DeepSeek: DeepSeek V3.2", "output_modalities": ["text"], - "permaslug": "sao10k/l3.1-euryale-70b", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-v3.2-20251201", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 Euryale 70B v2.2", - "slug": "sao10k/l3.1-euryale-70b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "DeepSeek V3.2", + "slug": "deepseek/deepseek-v3.2", + "updated_at": "2025-12-01T14:46:05.824401+00:00", "warning_message": null }, { - "author": "sao10k", - "context_length": 131072, - "created_at": "2024-12-18T15:32:08.468786+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-09-29T12:54:41.802445+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": 0.95 + }, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", + "description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model was trained under conditions aligned with V3.1-Terminus to enable direct comparison. Benchmarking shows performance roughly on par with V3.1 across reasoning, coding, and agentic tool-use tasks, with minor tradeoffs and gains depending on the domain. This release focuses on validating architectural optimizations for extended context lengths rather than advancing raw task accuracy, making it primarily a research-oriented model for exploring efficient transformer designs.", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 131072, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { @@ -102606,7 +102260,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "fde3788e-a185-4487-93a7-10fb4412d412", + "id": "1b484aef-be8f-4016-8762-2e3caabbab9f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -102615,142 +102269,186 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sao10k", - "context_length": 8192, - "created_at": "2024-12-18T15:32:08.468786+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-09-29T12:54:41.802445+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": 0.95 + }, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).", - "features": {}, - "group": "Llama3", + "description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model was trained under conditions aligned with V3.1-Terminus to enable direct comparison. Benchmarking shows performance roughly on par with V3.1 across reasoning, coding, and agentic tool-use tasks, with minor tradeoffs and gains depending on the domain. This release focuses on validating architectural optimizations for extended context lengths rather than advancing raw task accuracy, making it primarily a research-oriented model for exploring efficient transformer designs.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Sao10K/L3.3-70B-Euryale-v2.3", + "hf_slug": "deepseek-ai/DeepSeek-V3.2-Exp", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "Sao10K: Llama 3.3 Euryale 70B", + "name": "DeepSeek: DeepSeek V3.2 Exp", "output_modalities": ["text"], - "permaslug": "sao10k/l3.3-euryale-70b-v2.3", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-v3.2-exp", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 Euryale 70B", - "slug": "sao10k/l3.3-euryale-70b", + "short_name": "DeepSeek V3.2 Exp", + "slug": "deepseek/deepseek-v3.2-exp", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "sao10k/l3.3-euryale-70b-v2.3", - "model_variant_slug": "sao10k/l3.3-euryale-70b", + "model_variant_permaslug": "deepseek/deepseek-v3.2-exp", + "model_variant_slug": "deepseek/deepseek-v3.2-exp", "moderation_required": false, - "name": "NextBit | sao10k/l3.3-euryale-70b-v2.3", + "name": "Novita | deepseek/deepseek-v3.2-exp", "pricing": { - "completion": "0.00000075", + "completion": "0.00000041", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000065", - "request": "0", - "web_search": "0" + "prompt": "0.00000027" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "euryale:33-70b", - "provider_name": "NextBit", + "provider_model_id": "deepseek/deepseek-v3.2-exp", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", + "seed", + "top_k", + "repetition_penalty", "response_format", - "structured_outputs" + "structured_outputs", + "tools", + "tool_choice" ], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "Sao10K/L3.3-70B-Euryale-v2.3", + "hf_slug": "deepseek-ai/DeepSeek-V3.2-Exp", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "deepseek-v3.1", "model_version_group_id": null, - "name": "Sao10K: Llama 3.3 Euryale 70B", + "name": "DeepSeek: DeepSeek V3.2 Exp", "output_modalities": ["text"], - "permaslug": "sao10k/l3.3-euryale-70b-v2.3", - "reasoning_config": null, + "permaslug": "deepseek/deepseek-v3.2-exp", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 Euryale 70B", - "slug": "sao10k/l3.3-euryale-70b", + "short_name": "DeepSeek V3.2 Exp", + "slug": "deepseek/deepseek-v3.2-exp", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "thedrummer", - "context_length": 131072, - "created_at": "2025-09-27T00:11:18.116138+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "deepseek", + "context_length": 64000, + "created_at": "2025-01-20T13:51:35.96912+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", + "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 131072, + "adapter_name": "NovitaAdapter", + "can_abort": true, + "context_length": 64000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -102759,8 +102457,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "9d5db8f1-2899-4304-8f79-09212049ad13", + "has_completions": false, + "id": "cca51d1b-db1d-44d2-bc90-268311214746", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -102769,482 +102467,169 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "thedrummer", - "context_length": 131072, - "created_at": "2025-09-27T00:11:18.116138+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-01-20T13:51:35.96912+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", + "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", "features": { "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "Other", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "thedrummer/cydonia-24b-v4.1", + "hf_slug": "deepseek-ai/DeepSeek-R1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "TheDrummer: Cydonia 24B V4.1", + "instruct_type": "deepseek-r1", + "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", + "name": "DeepSeek: R1", "output_modalities": ["text"], - "permaslug": "thedrummer/cydonia-24b-v4.1", + "permaslug": "deepseek/deepseek-r1", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Cydonia 24B V4.1", - "slug": "thedrummer/cydonia-24b-v4.1", + "short_name": "R1", + "slug": "deepseek/deepseek-r1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "thedrummer/cydonia-24b-v4.1", - "model_variant_slug": "thedrummer/cydonia-24b-v4.1", + "model_variant_permaslug": "deepseek/deepseek-r1", + "model_variant_slug": "deepseek/deepseek-r1", "moderation_required": false, - "name": "NextBit | thedrummer/cydonia-24b-v4.1", + "name": "Novita | deepseek/deepseek-r1", "pricing": { - "completion": "0.0000005", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000007" }, - "provider_display_name": "NextBit", + "provider_display_name": "NovitaAI", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", + "adapterName": "NovitaAdapter", + "baseUrl": "https://api.novita.ai/v3/openai", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", + "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", + "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", "training": false }, - "displayName": "NextBit", + "displayName": "NovitaAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, + "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, + "ignoredProviderModels": [ + "google/gemma-3-1b-it", + "baichuan/baichuan-m2-32b", + "baidu/ernie-4.5-0.3b", + "qwen/qwen-mt-plus", + "qwen/qwen3-4b-fp8", + "meta-llama/llama-3.2-1b-instruct", + "sophosympatheia/midnight-rose-70b", + "deepseek/deepseek-prover-v2-671b", + "Sao10K/L3-8B-Stheno-v3.2", + "thudm/glm-4-32b-0414", + "qwen/qwen3-omni-30b-a3b-thinking", + "qwen/qwen3-omni-30b-a3b-instruct", + "paddlepaddle/paddleocr-vl", + "deepseek/deepseek-ocr", + "skywork/r1v4-lite", + "baidu/ernie-4.5-vl-28b-a3b-thinking", + "zai-org/autoglm-phone-9b-multilingual" + ], + "isAbortable": true, + "isMultipartSupported": true, "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null + "name": "Novita", + "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], + "slug": "novita", + "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "cydonia:24b", - "provider_name": "NextBit", + "provider_model_id": "deepseek/deepseek-r1-turbo", + "provider_name": "Novita", "provider_region": null, - "provider_slug": "nextbit/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "response_format", - "structured_outputs" + "seed", + "top_k", + "repetition_penalty", + "tools", + "tool_choice" ], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "Other", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "thedrummer/cydonia-24b-v4.1", + "hf_slug": "deepseek-ai/DeepSeek-R1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "TheDrummer: Cydonia 24B V4.1", + "instruct_type": "deepseek-r1", + "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", + "name": "DeepSeek: R1", "output_modalities": ["text"], - "permaslug": "thedrummer/cydonia-24b-v4.1", + "permaslug": "deepseek/deepseek-r1", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Cydonia 24B V4.1", - "slug": "thedrummer/cydonia-24b-v4.1", + "short_name": "R1", + "slug": "deepseek/deepseek-r1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "thedrummer", - "context_length": 32768, - "created_at": "2024-09-30T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], - "default_system": null, - "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", - "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 32768, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", - "training": false - }, - "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": true, - "id": "94c3af54-1ed5-400c-affb-570e2935d725", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": null, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "thedrummer", - "context_length": 32768, - "created_at": "2024-09-30T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], - "default_system": null, - "description": "Rocinante 12B is designed for engaging storytelling and rich prose.\n\nEarly testers have reported:\n- Expanded vocabulary with unique and expressive word choices\n- Enhanced creativity for vivid narratives\n- Adventure-filled and captivating stories", - "features": {}, - "group": "Qwen", - "has_text_output": true, - "hf_slug": "TheDrummer/Rocinante-12B-v1.1", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", - "model_version_group_id": null, - "name": "TheDrummer: Rocinante 12B", - "output_modalities": ["text"], - "permaslug": "thedrummer/rocinante-12b", - "reasoning_config": null, - "router": null, - "short_name": "Rocinante 12B", - "slug": "thedrummer/rocinante-12b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "thedrummer/rocinante-12b", - "model_variant_slug": "thedrummer/rocinante-12b", - "moderation_required": false, - "name": "NextBit | thedrummer/rocinante-12b", - "pricing": { - "completion": "0.00000043", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000017", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "NextBit", - "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", - "byokEnabled": true, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", - "training": false - }, - "displayName": "NextBit", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": true, - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" - }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, - "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null - }, - "provider_model_id": "rocinante:12b", - "provider_name": "NextBit", - "provider_region": null, - "provider_slug": "nextbit/bf16", - "quantization": "bf16", - "supported_parameters": [ - "structured_outputs", - "response_format", - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "tools", - "tool_choice" - ], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "standard" - }, - "features": {}, - "group": "Qwen", - "has_text_output": true, - "hf_slug": "TheDrummer/Rocinante-12B-v1.1", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", - "model_version_group_id": null, - "name": "TheDrummer: Rocinante 12B", - "output_modalities": ["text"], - "permaslug": "thedrummer/rocinante-12b", - "reasoning_config": null, - "router": null, - "short_name": "Rocinante 12B", - "slug": "thedrummer/rocinante-12b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "thedrummer", - "context_length": 32768, - "created_at": "2024-11-08T22:04:08.359811+00:00", - "default_parameters": {}, - "default_stops": ["[INST]", ""], - "default_system": null, - "description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.", - "endpoint": { - "adapter_name": "OpenAIAdapter", - "can_abort": false, - "context_length": 32768, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", - "training": false - }, - "features": { - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": true, - "id": "08cb2fef-fad6-4a41-b292-b2d79f782a08", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": null, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "thedrummer", - "context_length": 32000, - "created_at": "2024-11-08T22:04:08.359811+00:00", - "default_parameters": {}, - "default_stops": ["[INST]", ""], - "default_system": null, - "description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.", - "features": {}, - "group": "Mistral", - "has_text_output": true, - "hf_slug": "TheDrummer/UnslopNemo-12B-v4.1", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": "mistral", - "model_version_group_id": null, - "name": "TheDrummer: UnslopNemo 12B", - "output_modalities": ["text"], - "permaslug": "thedrummer/unslopnemo-12b", - "reasoning_config": null, - "router": null, - "short_name": "UnslopNemo 12B", - "slug": "thedrummer/unslopnemo-12b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "thedrummer/unslopnemo-12b", - "model_variant_slug": "thedrummer/unslopnemo-12b", - "moderation_required": false, - "name": "NextBit | thedrummer/unslopnemo-12b", - "pricing": { - "completion": "0.0000004", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "NextBit", - "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://api.nextbit256.com/v1", - "byokEnabled": true, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://www.nextbit256.com/docs/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.nextbit256.com/docs/terms-of-service", - "training": false - }, - "displayName": "NextBit", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": true, - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://nextbit256.com/&size=256" - }, - "ignoredProviderModels": [], - "isAbortable": false, - "isMultipartSupported": false, - "moderationRequired": false, - "name": "NextBit", - "owners": ["{}"], - "slug": "nextbit", - "statusPageUrl": null - }, - "provider_model_id": "unslopnemo:12b", - "provider_name": "NextBit", - "provider_region": null, - "provider_slug": "nextbit/fp8", - "quantization": "fp8", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "response_format", - "structured_outputs", - "tools", - "tool_choice" - ], - "supports_multipart": false, - "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "standard" - }, - "features": {}, - "group": "Mistral", - "has_text_output": true, - "hf_slug": "TheDrummer/UnslopNemo-12B-v4.1", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": "mistral", - "model_version_group_id": null, - "name": "TheDrummer: UnslopNemo 12B", - "output_modalities": ["text"], - "permaslug": "thedrummer/unslopnemo-12b", - "reasoning_config": null, - "router": null, - "short_name": "UnslopNemo 12B", - "slug": "thedrummer/unslopnemo-12b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - } - ], - "name": "NextBit", - "slug": "nextbit" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "NovitaAI", - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" - }, - "models": [ - { - "author": "baidu", - "context_length": 120000, - "created_at": "2025-08-12T21:29:27.753118+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.8, - "top_p": 0.8 + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 120000, + "context_length": 163840, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -103253,11 +102638,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, - "supports_input_audio": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -103267,7 +102648,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "83e51832-4cf9-4bcb-ad50-10e150d48b86", + "id": "04c4f1eb-bb42-42b6-823b-107ae08cf7a2", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -103276,63 +102657,60 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8000, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "baidu", - "context_length": 131072, - "created_at": "2025-08-12T21:29:27.753118+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.8, - "top_p": 0.8 + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an extensive 131K token context length, the model achieves efficient inference via multi-expert parallel collaboration and quantization, while advanced post-training techniques including SFT, DPO, and UPO ensure optimized performance across diverse applications with specialized routing and balancing losses for superior task handling.", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "Other", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "baidu/ERNIE-4.5-21B-A3B-PT", + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "Baidu: ERNIE 4.5 21B A3B", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "baidu/ernie-4.5-21b-a3b", + "permaslug": "deepseek/deepseek-r1-0528", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "ERNIE 4.5 21B A3B", - "slug": "baidu/ernie-4.5-21b-a3b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, - "model_variant_permaslug": "baidu/ernie-4.5-21b-a3b", - "model_variant_slug": "baidu/ernie-4.5-21b-a3b", + "model_variant_permaslug": "deepseek/deepseek-r1-0528", + "model_variant_slug": "deepseek/deepseek-r1-0528", "moderation_required": false, - "name": "Novita | baidu/ernie-4.5-21b-a3b", + "name": "Novita | deepseek/deepseek-r1-0528", "pricing": { - "completion": "0.00000028", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000007", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000035", + "prompt": "0.0000007" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -103381,12 +102759,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "baidu/ernie-4.5-21B-a3b", + "provider_model_id": "deepseek/deepseek-r1-0528", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -103397,59 +102777,58 @@ "top_k", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "Other", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "baidu/ERNIE-4.5-21B-A3B-PT", + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "Baidu: ERNIE 4.5 21B A3B", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "baidu/ernie-4.5-21b-a3b", + "permaslug": "deepseek/deepseek-r1-0528", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "ERNIE 4.5 21B A3B", - "slug": "baidu/ernie-4.5-21b-a3b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, { - "author": "baidu", - "context_length": 131072, - "created_at": "2025-10-09T22:28:07.216653+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": 0.95 - }, - "default_stops": [], + "author": "deepseek", + "context_length": 8192, + "created_at": "2025-01-23T20:12:49.780212+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation, and expert-level academic benchmarks.", + "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -103458,11 +102837,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -103470,9 +102844,9 @@ "type_function": true } }, - "has_chat_completions": true, - "has_completions": true, - "id": "af4c86e1-0b7f-4a10-a655-eed49e3a1054", + "has_chat_completions": false, + "has_completions": false, + "id": "797ebc09-6e53-4795-9d9e-2669ce7aee03", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -103481,63 +102855,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "baidu", - "context_length": 131072, - "created_at": "2025-10-09T22:28:07.216653+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": 0.95 - }, - "default_stops": [], + "author": "deepseek", + "context_length": 128000, + "created_at": "2025-01-23T20:12:49.780212+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation, and expert-level academic benchmarks.", + "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "features": { "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "Other", + "group": "Llama3", "has_text_output": true, - "hf_slug": "baidu/ERNIE-4.5-21B-A3B-Thinking", + "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Baidu: ERNIE 4.5 21B A3B Thinking", + "instruct_type": "deepseek-r1", + "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", + "name": "DeepSeek: R1 Distill Llama 70B", "output_modalities": ["text"], - "permaslug": "baidu/ernie-4.5-21b-a3b-thinking", + "permaslug": "deepseek/deepseek-r1-distill-llama-70b", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "ERNIE 4.5 21B A3B Thinking", - "slug": "baidu/ernie-4.5-21b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "R1 Distill Llama 70B", + "slug": "deepseek/deepseek-r1-distill-llama-70b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "baidu/ernie-4.5-21b-a3b-thinking", - "model_variant_slug": "baidu/ernie-4.5-21b-a3b-thinking", + "model_variant_permaslug": "deepseek/deepseek-r1-distill-llama-70b", + "model_variant_slug": "deepseek/deepseek-r1-distill-llama-70b", "moderation_required": false, - "name": "Novita | baidu/ernie-4.5-21b-a3b-thinking", + "name": "Novita | deepseek/deepseek-r1-distill-llama-70b", "pricing": { - "completion": "0.00000028", + "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000007", - "request": "0", - "web_search": "0" + "prompt": "0.0000008" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -103586,11 +102949,11 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "baidu/ernie-4.5-21B-a3b-thinking", + "provider_model_id": "deepseek/deepseek-r1-distill-llama-70b", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ "reasoning", "include_reasoning", @@ -103602,7 +102965,9 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -103613,44 +102978,46 @@ "features": { "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "Other", + "group": "Llama3", "has_text_output": true, - "hf_slug": "baidu/ERNIE-4.5-21B-A3B-Thinking", + "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Baidu: ERNIE 4.5 21B A3B Thinking", + "instruct_type": "deepseek-r1", + "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", + "name": "DeepSeek: R1 Distill Llama 70B", "output_modalities": ["text"], - "permaslug": "baidu/ernie-4.5-21b-a3b-thinking", + "permaslug": "deepseek/deepseek-r1-distill-llama-70b", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "ERNIE 4.5 21B A3B Thinking", - "slug": "baidu/ernie-4.5-21b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "R1 Distill Llama 70B", + "slug": "deepseek/deepseek-r1-distill-llama-70b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "baidu", - "context_length": 123000, - "created_at": "2025-06-30T16:15:39.588489+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "google", + "context_length": 98304, + "created_at": "2025-03-12T05:12:39.645813+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["", "", ""], "default_system": null, - "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 123000, + "context_length": 98304, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -103659,7 +103026,10 @@ "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -103669,7 +103039,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "0ca3fe58-73fa-4687-a6b4-73c0657fce71", + "id": "cf530f36-7cb8-4eb9-8953-880eef952af3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -103678,59 +103048,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 12000, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "baidu", + "author": "google", "context_length": 131072, - "created_at": "2025-06-30T16:15:39.588489+00:00", - "default_parameters": {}, - "default_stops": [], + "created_at": "2025-03-12T05:12:39.645813+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["", "", ""], "default_system": null, - "description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in both English and Chinese. Optimized for high-throughput inference and efficient scaling, it uses a heterogeneous MoE structure with advanced routing and quantization strategies, including FP8 and 2-bit formats. This version is fine-tuned for language-only tasks and supports reasoning, tool parameters, and extended context lengths up to 131k tokens. Suitable for general-purpose LLM applications with high reasoning and throughput demands.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "baidu/ERNIE-4.5-300B-A47B-PT", + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Baidu: ERNIE 4.5 300B A47B ", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B", "output_modalities": ["text"], - "permaslug": "baidu/ernie-4.5-300b-a47b", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "ERNIE 4.5 300B A47B ", - "slug": "baidu/ernie-4.5-300b-a47b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemma 3 27B", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, - "model_variant_permaslug": "baidu/ernie-4.5-300b-a47b", - "model_variant_slug": "baidu/ernie-4.5-300b-a47b", + "model_variant_permaslug": "google/gemma-3-27b-it", + "model_variant_slug": "google/gemma-3-27b-it", "moderation_required": false, - "name": "Novita | baidu/ernie-4.5-300b-a47b", + "name": "Novita | google/gemma-3-27b-it", "pricing": { - "completion": "0.0000011", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000028", - "request": "0", - "web_search": "0" + "prompt": "0.000000119" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -103779,7 +103149,7 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "baidu/ernie-4.5-300b-a47b-paddle", + "provider_model_id": "google/gemma-3-27b-it", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/bf16", @@ -103793,9 +103163,7 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty", - "response_format", - "structured_outputs" + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, @@ -103804,46 +103172,51 @@ "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "baidu/ERNIE-4.5-300B-A47B-PT", + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Baidu: ERNIE 4.5 300B A47B ", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B", "output_modalities": ["text"], - "permaslug": "baidu/ernie-4.5-300b-a47b", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "ERNIE 4.5 300B A47B ", - "slug": "baidu/ernie-4.5-300b-a47b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemma 3 27B", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, { - "author": "baidu", - "context_length": 30000, - "created_at": "2025-08-12T21:07:16.565993+00:00", - "default_parameters": {}, + "author": "kwaipilot", + "context_length": 256000, + "created_at": "2025-11-10T03:38:32.123517+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.", + "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. \n\nThe model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 30000, + "context_length": 256000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -103852,8 +103225,6 @@ "training": false }, "features": { - "supported_parameters": {}, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -103863,7 +103234,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "75e64609-6b51-4e35-96ea-5065c6fbda63", + "id": "0394900c-1d75-40e9-90ce-e0b2366143da", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -103872,18 +103243,25 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8000, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "baidu", - "context_length": 131072, - "created_at": "2025-08-12T21:07:16.565993+00:00", - "default_parameters": {}, + "author": "kwaipilot", + "context_length": 262144, + "created_at": "2025-11-10T03:38:32.123517+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing. Built with scaling-efficient infrastructure for high-throughput training and inference, the model leverages advanced post-training techniques including SFT, DPO, and UPO for optimized performance, while supporting an impressive 131K context length and RLVR alignment for superior cross-modal reasoning and generation capabilities.", + "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. \n\nThe model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, @@ -103892,39 +103270,35 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "baidu/ERNIE-4.5-VL-28B-A3B-PT", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Baidu: ERNIE 4.5 VL 28B A3B", + "name": "Kwaipilot: KAT-Coder-Pro V1", "output_modalities": ["text"], - "permaslug": "baidu/ernie-4.5-vl-28b-a3b", + "permaslug": "kwaipilot/kat-coder-pro-v1", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "ERNIE 4.5 VL 28B A3B", - "slug": "baidu/ernie-4.5-vl-28b-a3b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null + "short_name": "KAT-Coder-Pro V1", + "slug": "kwaipilot/kat-coder-pro", + "updated_at": "2026-01-11T23:46:45.088471+00:00", + "warning_message": "The free endpoint has reduced rate limits from January 5-12, after which it will be deprecated. Use [kwaipilot/kat-coder-pro-v1](https://openrouter.ai/kwaipilot/kat-coder-pro) for continued access." }, - "model_variant_permaslug": "baidu/ernie-4.5-vl-28b-a3b", - "model_variant_slug": "baidu/ernie-4.5-vl-28b-a3b", + "model_variant_permaslug": "kwaipilot/kat-coder-pro-v1", + "model_variant_slug": "kwaipilot/kat-coder-pro", "moderation_required": false, - "name": "Novita | baidu/ernie-4.5-vl-28b-a3b", + "name": "Novita | kwaipilot/kat-coder-pro-v1", "pricing": { - "completion": "0.00000056", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000014", - "request": "0", - "web_search": "0" + "completion": "0.000000828", + "discount": 0.31, + "input_cache_read": "0.0000000414", + "prompt": "0.000000207" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -103973,14 +103347,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "baidu/ernie-4.5-vl-28b-a3b", + "provider_model_id": "kwaipilot/kat-coder-pro", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp16", "quantization": "fp16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -103991,15 +103363,20 @@ "top_k", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, @@ -104008,38 +103385,38 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "baidu/ERNIE-4.5-VL-28B-A3B-PT", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Baidu: ERNIE 4.5 VL 28B A3B", + "name": "Kwaipilot: KAT-Coder-Pro V1", "output_modalities": ["text"], - "permaslug": "baidu/ernie-4.5-vl-28b-a3b", + "permaslug": "kwaipilot/kat-coder-pro-v1", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "ERNIE 4.5 VL 28B A3B", - "slug": "baidu/ernie-4.5-vl-28b-a3b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null + "short_name": "KAT-Coder-Pro V1", + "slug": "kwaipilot/kat-coder-pro", + "updated_at": "2026-01-11T23:46:45.088471+00:00", + "warning_message": "The free endpoint has reduced rate limits from January 5-12, after which it will be deprecated. Use [kwaipilot/kat-coder-pro-v1](https://openrouter.ai/kwaipilot/kat-coder-pro) for continued access." }, { - "author": "baidu", - "context_length": 123000, - "created_at": "2025-06-30T16:28:23.022047+00:00", + "author": "meta-llama", + "context_length": 8192, + "created_at": "2024-04-18T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 123000, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -104048,11 +103425,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -104062,7 +103434,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "1fe59382-263d-4107-a768-d7d2007bd3b6", + "id": "5a9af169-99c1-4922-ad8f-06c4b22b8817", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -104071,59 +103443,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16000, + "max_completion_tokens": 8000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "baidu", - "context_length": 131072, - "created_at": "2025-06-30T16:28:23.022047+00:00", + "author": "meta-llama", + "context_length": 8192, + "created_at": "2024-04-18T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data using a heterogeneous MoE architecture and modality-isolated routing to enable high-fidelity cross-modal reasoning, image understanding, and long-context generation (up to 131k tokens). Fine-tuned with techniques like SFT, DPO, UPO, and RLVR, this model supports both “thinking” and non-thinking inference modes. Designed for vision-language tasks in English and Chinese, it is optimized for efficient scaling and can operate under 4-bit/8-bit quantization.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "baidu/ERNIE-4.5-VL-424B-A47B-PT", + "hf_slug": "meta-llama/Meta-Llama-3-70B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Baidu: ERNIE 4.5 VL 424B A47B ", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3 70B Instruct", "output_modalities": ["text"], - "permaslug": "baidu/ernie-4.5-vl-424b-a47b", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "ERNIE 4.5 VL 424B A47B ", - "slug": "baidu/ernie-4.5-vl-424b-a47b", + "short_name": "Llama 3 70B Instruct", + "slug": "meta-llama/llama-3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "baidu/ernie-4.5-vl-424b-a47b", - "model_variant_slug": "baidu/ernie-4.5-vl-424b-a47b", + "model_variant_permaslug": "meta-llama/llama-3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3-70b-instruct", "moderation_required": false, - "name": "Novita | baidu/ernie-4.5-vl-424b-a47b", + "name": "Novita | meta-llama/llama-3-70b-instruct", "pricing": { - "completion": "0.00000125", + "completion": "0.00000074", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000042", - "request": "0", - "web_search": "0" + "prompt": "0.00000051" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -104172,14 +103529,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "baidu/ernie-4.5-vl-424b-a47b", + "provider_model_id": "meta-llama/llama-3-70b-instruct", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp16", - "quantization": "fp16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -104188,59 +103543,47 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "baidu/ERNIE-4.5-VL-424B-A47B-PT", + "hf_slug": "meta-llama/Meta-Llama-3-70B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Baidu: ERNIE 4.5 VL 424B A47B ", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3 70B Instruct", "output_modalities": ["text"], - "permaslug": "baidu/ernie-4.5-vl-424b-a47b", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "ERNIE 4.5 VL 424B A47B ", - "slug": "baidu/ernie-4.5-vl-424b-a47b", + "short_name": "Llama 3 70B Instruct", + "slug": "meta-llama/llama-3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 128000, - "created_at": "2025-05-29T17:09:03.280319+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "meta-llama", + "context_length": 8192, + "created_at": "2024-04-18T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek-R1-0528 is a lightly upgraded release of DeepSeek R1 that taps more compute and smarter post-training tricks, pushing its reasoning and inference to the brink of flagship models like O3 and Gemini 2.5 Pro.\nIt now tops math, programming, and logic leaderboards, showcasing a step-change in depth-of-thought.\nThe distilled variant, DeepSeek-R1-0528-Qwen3-8B, transfers this chain-of-thought into an 8 B-parameter form, beating standard Qwen3 8B by +10 pp and tying the 235 B “thinking” giant on AIME 2024.", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -104258,7 +103601,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "ffe306d9-970d-48c6-915a-1a4144c406c4", + "id": "425a0bee-ae8c-476b-837a-df27836d0f9d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -104267,63 +103610,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32000, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-05-29T17:09:03.280319+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "meta-llama", + "context_length": 8192, + "created_at": "2024-04-18T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek-R1-0528 is a lightly upgraded release of DeepSeek R1 that taps more compute and smarter post-training tricks, pushing its reasoning and inference to the brink of flagship models like O3 and Gemini 2.5 Pro.\nIt now tops math, programming, and logic leaderboards, showcasing a step-change in depth-of-thought.\nThe distilled variant, DeepSeek-R1-0528-Qwen3-8B, transfers this chain-of-thought into an 8 B-parameter form, beating standard Qwen3 8B by +10 pp and tying the 235 B “thinking” giant on AIME 2024.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/deepseek-r1-0528-qwen3-8b", + "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": null, - "name": "DeepSeek: DeepSeek R1 0528 Qwen3 8B", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3 8B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528-qwen3-8b", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "DeepSeek R1 0528 Qwen3 8B", - "slug": "deepseek/deepseek-r1-0528-qwen3-8b", + "short_name": "Llama 3 8B Instruct", + "slug": "meta-llama/llama-3-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-0528-qwen3-8b", - "model_variant_slug": "deepseek/deepseek-r1-0528-qwen3-8b", + "model_variant_permaslug": "meta-llama/llama-3-8b-instruct", + "model_variant_slug": "meta-llama/llama-3-8b-instruct", "moderation_required": false, - "name": "Novita | deepseek/deepseek-r1-0528-qwen3-8b", + "name": "Novita | meta-llama/llama-3-8b-instruct", "pricing": { - "completion": "0.00000009", + "completion": "0.00000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -104372,14 +103696,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-r1-0528-qwen3-8b", + "provider_model_id": "meta-llama/llama-3-8b-instruct", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/bf16", "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -104391,52 +103713,42 @@ "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/deepseek-r1-0528-qwen3-8b", + "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": null, - "name": "DeepSeek: DeepSeek R1 0528 Qwen3 8B", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3 8B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528-qwen3-8b", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "DeepSeek R1 0528 Qwen3 8B", - "slug": "deepseek/deepseek-r1-0528-qwen3-8b", + "short_name": "Llama 3 8B Instruct", + "slug": "meta-llama/llama-3-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek-ai", - "context_length": 64000, - "created_at": "2024-12-26T19:28:40.559917+00:00", + "author": "meta-llama", + "context_length": 16384, + "created_at": "2024-07-23T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 64000, + "context_length": 16384, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -104445,7 +103757,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -104455,7 +103766,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cefb5a1e-9fea-4496-9a64-4e1b11a7cc8c", + "id": "baecce7a-5028-432e-aaa8-d78b5b700929", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -104464,49 +103775,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16000, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek-ai", + "author": "meta-llama", "context_length": 131072, - "created_at": "2024-12-26T19:28:40.559917+00:00", + "created_at": "2024-07-23T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations reveal that the model outperforms other open-source models and rivals leading closed-source models.\n\nFor model details, please visit [the DeepSeek-V3 repo](https://github.com/deepseek-ai/DeepSeek-V3) for more information, or see the [launch announcement](https://api-docs.deepseek.com/news/news1226).", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "features": {}, - "group": "DeepSeek", + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3", + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3", + "permaslug": "meta-llama/llama-3.1-8b-instruct", "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3", - "slug": "deepseek/deepseek-chat", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3", - "model_variant_slug": "deepseek/deepseek-chat", + "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", "moderation_required": false, - "name": "Novita | deepseek/deepseek-chat-v3", + "name": "Novita | meta-llama/llama-3.1-8b-instruct", "pricing": { - "completion": "0.0000013", + "completion": "0.00000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000002" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -104555,7 +103861,7 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-v3-turbo", + "provider_model_id": "meta-llama/llama-3.1-8b-instruct", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", @@ -104569,47 +103875,45 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty", - "tools", - "tool_choice" + "repetition_penalty" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "DeepSeek", + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3", + "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3.1 8B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3", + "permaslug": "meta-llama/llama-3.1-8b-instruct", "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3", - "slug": "deepseek/deepseek-chat", + "short_name": "Llama 3.1 8B Instruct", + "slug": "meta-llama/llama-3.1-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -104628,7 +103932,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "3a816eb8-7a0b-4a9a-b497-e3a1efb349fc", + "id": "8f74ff86-f435-4f1b-aea0-f7869c7eaa51", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -104637,50 +103941,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 163840, + "max_completion_tokens": 120000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", + "author": "meta-llama", "context_length": 131072, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "features": {}, - "group": "DeepSeek", + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", + "permaslug": "meta-llama/llama-3.3-70b-instruct", "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", - "model_variant_slug": "deepseek/deepseek-chat-v3-0324", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "Novita | deepseek/deepseek-chat-v3-0324", + "name": "Novita | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0.00000112", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000135", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.000000135" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -104729,11 +104027,11 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-v3-0324", + "provider_model_id": "meta-llama/llama-3.3-70b-instruct", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", @@ -104745,9 +104043,7 @@ "top_k", "repetition_penalty", "tools", - "tool_choice", - "response_format", - "structured_outputs" + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, @@ -104756,36 +104052,36 @@ "variant": "standard" }, "features": {}, - "group": "DeepSeek", + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", + "permaslug": "meta-llama/llama-3.3-70b-instruct", "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-08-21T12:33:48+00:00", + "author": "meta-llama", + "context_length": 1048576, + "created_at": "2025-04-05T19:37:02.129674+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -104795,79 +104091,62 @@ }, "features": { "supported_parameters": {}, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, - "type_function": true + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "715d05e1-9fa7-4ceb-834a-833f5e26a55a", + "id": "52093e97-a003-4dfa-b247-53ead8bb300c", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 200, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 8192, "max_prompt_tokens": null, - "max_tokens_per_image": null, + "max_tokens_per_image": 3342, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-08-21T12:33:48+00:00", + "author": "meta-llama", + "context_length": 1048576, + "created_at": "2025-04-05T19:37:02.129674+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1", + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3.1", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3.1", - "slug": "deepseek/deepseek-chat-v3.1", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3.1", - "model_variant_slug": "deepseek/deepseek-chat-v3.1", + "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "model_variant_slug": "meta-llama/llama-4-maverick", "moderation_required": false, - "name": "Novita | deepseek/deepseek-chat-v3.1", + "name": "Novita | meta-llama/llama-4-maverick-17b-128e-instruct", "pricing": { - "completion": "0.000001", + "completion": "0.00000085", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000135", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.00000027" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -104916,14 +104195,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-v3.1", + "provider_model_id": "meta-llama/llama-4-maverick-17b-128e-instruct-fp8", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -104932,59 +104209,41 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty", - "tools", - "tool_choice", - "response_format", - "structured_outputs" + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1", + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3.1", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3.1", - "slug": "deepseek/deepseek-chat-v3.1", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", + "author": "meta-llama", "context_length": 131072, - "created_at": "2025-09-22T13:37:55.611452+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "created_at": "2025-04-05T19:31:59.735804+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, @@ -104997,6 +104256,7 @@ "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -105006,7 +104266,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "812bc18a-5b61-42d0-a550-f9ed8c2164c5", + "id": "b0352c4a-a51c-4ee0-9f5e-cfa527c3a208", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -105015,64 +104275,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-09-22T13:37:55.611452+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "meta-llama", + "context_length": 10000000, + "created_at": "2025-04-05T19:31:59.735804+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's performance in coding and search agents. It is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. ", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1-Terminus", + "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1 Terminus", + "name": "Meta: Llama 4 Scout", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.1-terminus", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3.1 Terminus", - "slug": "deepseek/deepseek-v3.1-terminus", + "short_name": "Llama 4 Scout", + "slug": "meta-llama/llama-4-scout", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.1-terminus", - "model_variant_slug": "deepseek/deepseek-v3.1-terminus", + "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "model_variant_slug": "meta-llama/llama-4-scout", "moderation_required": false, - "name": "Novita | deepseek/deepseek-v3.1-terminus", + "name": "Novita | meta-llama/llama-4-scout-17b-16e-instruct", "pricing": { - "completion": "0.000001", + "completion": "0.00000059", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000135", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.00000018" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -105121,14 +104361,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-v3.1-terminus", + "provider_model_id": "meta-llama/llama-4-scout-17b-16e-instruct", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -105137,63 +104375,45 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty", - "tools", - "tool_choice", - "response_format", - "structured_outputs" + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1-Terminus", + "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1 Terminus", + "name": "Meta: Llama 4 Scout", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.1-terminus", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "reasoning_config": null, "router": null, - "short_name": "DeepSeek V3.1 Terminus", - "slug": "deepseek/deepseek-v3.1-terminus", + "short_name": "Llama 4 Scout", + "slug": "meta-llama/llama-4-scout", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-12-01T13:10:42.818885+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, + "author": "minimax", + "context_length": 1000000, + "created_at": "2025-06-17T22:46:54.257159+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 1000000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -105202,17 +104422,19 @@ "training": false }, "features": { - "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": { + "structured_outputs": false + }, "supports_tool_choice": { - "literal_auto": false, - "literal_none": false, + "literal_auto": true, + "literal_none": true, "literal_required": true, - "type_function": false + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "8a5fb6d3-ef71-4958-af54-269ffa3a3c6e", + "id": "aaecb87e-e042-4c74-836b-936baab09de1", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -105221,65 +104443,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 40000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-12-01T13:10:42.818885+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, + "author": "minimax", + "context_length": 1000000, + "created_at": "2025-06-17T22:46:54.257159+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "MiniMax: MiniMax M1", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "minimax/minimax-m1", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "MiniMax M1", + "slug": "minimax/minimax-m1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", - "model_variant_slug": "deepseek/deepseek-v3.2", + "model_variant_permaslug": "minimax/minimax-m1", + "model_variant_slug": "minimax/minimax-m1", "moderation_required": false, - "name": "Novita | deepseek/deepseek-v3.2-20251201", + "name": "Novita | minimax/minimax-m1", "pricing": { - "completion": "0.0000004", + "completion": "0.00000176", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000001345", - "internal_reasoning": "0", - "prompt": "0.000000269", - "request": "0", - "web_search": "0" + "prompt": "0.00000044" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -105328,11 +104537,11 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-v3.2", + "provider_model_id": "minimaxai/minimax-m1-80k", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ "reasoning", "include_reasoning", @@ -105344,60 +104553,59 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "MiniMax: MiniMax M1", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "minimax/minimax-m1", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "MiniMax M1", + "slug": "minimax/minimax-m1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-09-29T12:54:41.802445+00:00", + "author": "minimax", + "context_length": 204800, + "created_at": "2025-10-23T20:41:33.120854+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, + "temperature": 1, "top_p": 0.95 }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model was trained under conditions aligned with V3.1-Terminus to enable direct comparison. Benchmarking shows performance roughly on par with V3.1 across reasoning, coding, and agentic tool-use tasks, with minor tradeoffs and gains depending on the domain. This release focuses on validating architectural optimizations for extended context lengths rather than advancing raw task accuracy, making it primarily a research-oriented model for exploring efficient transformer designs.", + "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 204800, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -105406,6 +104614,9 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "content-string", + "should_send_reasoning_text_in_text_content": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -105415,7 +104626,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "1b484aef-be8f-4016-8762-2e3caabbab9f", + "id": "09da61cf-71a6-4717-9ebd-857e7203e3b4", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -105424,64 +104635,62 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-09-29T12:54:41.802445+00:00", + "author": "minimax", + "context_length": 204800, + "created_at": "2025-10-23T20:41:33.120854+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, + "temperature": 1, "top_p": 0.95 }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism designed to improve training and inference efficiency in long-context scenarios while maintaining output quality. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model was trained under conditions aligned with V3.1-Terminus to enable direct comparison. Benchmarking shows performance roughly on par with V3.1 across reasoning, coding, and agentic tool-use tasks, with minor tradeoffs and gains depending on the domain. This release focuses on validating architectural optimizations for extended context lengths rather than advancing raw task accuracy, making it primarily a research-oriented model for exploring efficient transformer designs.", + "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2-Exp", + "hf_slug": "MiniMaxAI/MiniMax-M2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2 Exp", + "name": "MiniMax: MiniMax M2", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-exp", + "permaslug": "minimax/minimax-m2", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2 Exp", - "slug": "deepseek/deepseek-v3.2-exp", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2", + "slug": "minimax/minimax-m2", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.2-exp", - "model_variant_slug": "deepseek/deepseek-v3.2-exp", + "model_variant_permaslug": "minimax/minimax-m2", + "model_variant_slug": "minimax/minimax-m2", "moderation_required": false, - "name": "Novita | deepseek/deepseek-v3.2-exp", + "name": "Novita | minimax/minimax-m2", "pricing": { - "completion": "0.00000041", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000003", + "prompt": "0.0000003" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -105530,7 +104739,7 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-v3.2-exp", + "provider_model_id": "minimax/minimax-m2", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", @@ -105547,8 +104756,6 @@ "seed", "top_k", "repetition_penalty", - "response_format", - "structured_outputs", "tools", "tool_choice" ], @@ -105559,47 +104766,53 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2-Exp", + "hf_slug": "MiniMaxAI/MiniMax-M2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2 Exp", + "name": "MiniMax: MiniMax M2", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-exp", + "permaslug": "minimax/minimax-m2", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2 Exp", - "slug": "deepseek/deepseek-v3.2-exp", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2", + "slug": "minimax/minimax-m2", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 64000, - "created_at": "2025-01-20T13:51:35.96912+00:00", - "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], - "default_system": null, - "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", + "author": "minimax", + "context_length": 204800, + "created_at": "2025-12-23T01:56:37+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.9 + }, + "default_stops": [], + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 64000, + "context_length": 204800, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -105608,17 +104821,18 @@ "training": false }, "features": { - "supported_parameters": {}, + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": false, - "id": "cca51d1b-db1d-44d2-bc90-268311214746", + "has_completions": true, + "id": "a300fcbb-ec0e-4c1f-a4d4-d06dc8c19cf9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -105627,57 +104841,62 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16000, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-01-20T13:51:35.96912+00:00", - "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], - "default_system": null, - "description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model & [technical report](https://api-docs.deepseek.com/news/news250120).\n\nMIT licensed: Distill & commercialize freely!", + "author": "minimax", + "context_length": 204800, + "created_at": "2025-12-23T01:56:37+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.9 + }, + "default_stops": [], + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1", + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1", + "instruct_type": null, + "model_version_group_id": null, + "name": "MiniMax: MiniMax M2.1", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1", + "permaslug": "minimax/minimax-m2.1", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "R1", - "slug": "deepseek/deepseek-r1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1", - "model_variant_slug": "deepseek/deepseek-r1", + "model_variant_permaslug": "minimax/minimax-m2.1", + "model_variant_slug": "minimax/minimax-m2.1", "moderation_required": false, - "name": "Novita | deepseek/deepseek-r1", + "name": "Novita | minimax/minimax-m2.1", "pricing": { - "completion": "0.0000025", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000007", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000003", + "prompt": "0.0000003" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -105726,7 +104945,7 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-r1-turbo", + "provider_model_id": "minimax/minimax-m2.1", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", @@ -105744,7 +104963,8 @@ "top_k", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, @@ -105753,48 +104973,51 @@ "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1", + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1", + "instruct_type": null, + "model_version_group_id": null, + "name": "MiniMax: MiniMax M2.1", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1", + "permaslug": "minimax/minimax-m2.1", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "R1", - "slug": "deepseek/deepseek-r1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-05-28T17:59:30.833128+00:00", + "author": "mistralai", + "context_length": 60288, + "created_at": "2024-07-19T00:00:00+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.3 }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["[INST]", ""], "default_system": null, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 60288, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -105813,7 +105036,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "04c4f1eb-bb42-42b6-823b-107ae08cf7a2", + "id": "b61351b1-7c99-4bab-a6c8-87cb4807cc66", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -105822,65 +105045,46 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 16000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-05-28T17:59:30.833128+00:00", + "author": "mistralai", + "context_length": 131072, + "created_at": "2024-07-19T00:00:00+00:00", "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.3 }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["[INST]", ""], "default_system": null, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", + "instruct_type": "mistral", "model_version_group_id": null, - "name": "DeepSeek: R1 0528", + "name": "Mistral: Mistral Nemo", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "mistralai/mistral-nemo", + "reasoning_config": null, "router": null, - "short_name": "R1 0528", - "slug": "deepseek/deepseek-r1-0528", - "updated_at": "2026-01-08T20:10:31.314892+00:00", + "short_name": "Mistral Nemo", + "slug": "mistralai/mistral-nemo", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-0528", - "model_variant_slug": "deepseek/deepseek-r1-0528", + "model_variant_permaslug": "mistralai/mistral-nemo", + "model_variant_slug": "mistralai/mistral-nemo", "moderation_required": false, - "name": "Novita | deepseek/deepseek-r1-0528", + "name": "Novita | mistralai/mistral-nemo", "pricing": { - "completion": "0.0000025", + "completion": "0.00000017", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000035", - "internal_reasoning": "0", - "prompt": "0.0000007", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -105929,14 +105133,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-r1-0528", + "provider_model_id": "mistralai/mistral-nemo", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -105946,59 +105148,46 @@ "seed", "top_k", "repetition_penalty", - "tools", - "tool_choice", "response_format", "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", + "instruct_type": "mistral", "model_version_group_id": null, - "name": "DeepSeek: R1 0528", + "name": "Mistral: Mistral Nemo", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "mistralai/mistral-nemo", + "reasoning_config": null, "router": null, - "short_name": "R1 0528", - "slug": "deepseek/deepseek-r1-0528", - "updated_at": "2026-01-08T20:10:31.314892+00:00", + "short_name": "Mistral Nemo", + "slug": "mistralai/mistral-nemo", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 8192, - "created_at": "2025-01-23T20:12:49.780212+00:00", + "author": "moonshotai", + "context_length": 131072, + "created_at": "2025-07-11T19:47:32.565514+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -106007,6 +105196,7 @@ "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -106014,68 +105204,65 @@ "type_function": true } }, - "has_chat_completions": false, + "has_chat_completions": true, "has_completions": false, - "id": "797ebc09-6e53-4795-9d9e-2669ce7aee03", + "id": "f5265deb-6c78-49da-97e8-0f0b3dae6c5b", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 50, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 128000, - "created_at": "2025-01-23T20:12:49.780212+00:00", + "author": "moonshotai", + "context_length": 131072, + "created_at": "2025-07-11T19:47:32.565514+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "hf_slug": "moonshotai/Kimi-K2-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Llama 70B", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2 0711", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-llama-70b", + "permaslug": "moonshotai/kimi-k2", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "R1 Distill Llama 70B", - "slug": "deepseek/deepseek-r1-distill-llama-70b", + "short_name": "Kimi K2 0711", + "slug": "moonshotai/kimi-k2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-distill-llama-70b", - "model_variant_slug": "deepseek/deepseek-r1-distill-llama-70b", + "model_variant_permaslug": "moonshotai/kimi-k2", + "model_variant_slug": "moonshotai/kimi-k2", "moderation_required": false, - "name": "Novita | deepseek/deepseek-r1-distill-llama-70b", + "name": "Novita | moonshotai/kimi-k2", "pricing": { - "completion": "0.0000008", + "completion": "0.0000023", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000008", - "request": "0", - "web_search": "0" + "prompt": "0.00000057" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -106124,14 +105311,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-r1-distill-llama-70b", + "provider_model_id": "moonshotai/kimi-k2-instruct", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -106141,54 +105326,58 @@ "seed", "top_k", "repetition_penalty", + "tools", + "tool_choice", "response_format", "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "hf_slug": "moonshotai/Kimi-K2-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Llama 70B", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2 0711", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-llama-70b", + "permaslug": "moonshotai/kimi-k2", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "R1 Distill Llama 70B", - "slug": "deepseek/deepseek-r1-distill-llama-70b", + "short_name": "Kimi K2 0711", + "slug": "moonshotai/kimi-k2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 32768, - "created_at": "2025-01-29T23:39:00.13687+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 69.7\n- MATH-500 pass@1: 93.9\n- CodeForces Rating: 1481\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -106197,6 +105386,10 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -106206,7 +105399,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "a18c0e49-6efb-439a-9287-fb2f398f3c5a", + "id": "262402a8-8891-4131-bc3c-a76c4bb9b391", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -106215,57 +105408,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-01-29T23:39:00.13687+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on [Qwen 2.5 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\n\nOther benchmark results include:\n\n- AIME 2024 pass@1: 69.7\n- MATH-500 pass@1: 93.9\n- CodeForces Rating: 1481\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Qwen 14B", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-qwen-14b", + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "R1 Distill Qwen 14B", - "slug": "deepseek/deepseek-r1-distill-qwen-14b", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-distill-qwen-14b", - "model_variant_slug": "deepseek/deepseek-r1-distill-qwen-14b", + "model_variant_permaslug": "moonshotai/kimi-k2-0905", + "model_variant_slug": "moonshotai/kimi-k2-0905", "moderation_required": false, - "name": "Novita | deepseek/deepseek-r1-distill-qwen-14b", + "name": "Novita | moonshotai/kimi-k2-0905", "pricing": { - "completion": "0.00000015", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -106314,14 +105504,13 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-r1-distill-qwen-14b", + "provider_model_id": "moonshotai/kimi-k2-0905", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", + "response_format", "max_tokens", "temperature", "top_p", @@ -106331,54 +105520,61 @@ "seed", "top_k", "repetition_penalty", - "response_format", - "structured_outputs" + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Qwen", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Qwen 14B", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-qwen-14b", + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "R1 Distill Qwen 14B", - "slug": "deepseek/deepseek-r1-distill-qwen-14b", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 64000, - "created_at": "2025-01-29T23:53:50.865297+00:00", - "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\\n\\nOther benchmark results include:\\n\\n- AIME 2024 pass@1: 72.6\\n- MATH-500 pass@1: 94.3\\n- CodeForces Rating: 1691\\n\\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 64000, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -106388,6 +105584,7 @@ }, "features": { "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -106397,7 +105594,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "1a884e3b-b914-433d-8af3-c4db00e8d2f4", + "id": "a5cbd682-c60f-49d4-afda-520de76d9435", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -106406,57 +105603,60 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32000, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 128000, - "created_at": "2025-01-29T23:53:50.865297+00:00", - "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new state-of-the-art results for dense models.\\n\\nOther benchmark results include:\\n\\n- AIME 2024 pass@1: 72.6\\n- MATH-500 pass@1: 94.3\\n- CodeForces Rating: 1691\\n\\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, - "group": "Qwen", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Qwen 32B", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-qwen-32b", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "R1 Distill Qwen 32B", - "slug": "deepseek/deepseek-r1-distill-qwen-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-distill-qwen-32b", - "model_variant_slug": "deepseek/deepseek-r1-distill-qwen-32b", + "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", + "model_variant_slug": "moonshotai/kimi-k2-thinking", "moderation_required": false, - "name": "Novita | deepseek/deepseek-r1-distill-qwen-32b", + "name": "Novita | moonshotai/kimi-k2-thinking-20251106", "pricing": { - "completion": "0.0000003", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000015", + "prompt": "0.0000006" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -106505,7 +105705,7 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "deepseek/deepseek-r1-distill-qwen-32b", + "provider_model_id": "moonshotai/kimi-k2-thinking", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/bf16", @@ -106523,53 +105723,62 @@ "top_k", "repetition_penalty", "response_format", - "structured_outputs" + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, - "group": "Qwen", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Qwen 32B", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-qwen-32b", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "R1 Distill Qwen 32B", - "slug": "deepseek/deepseek-r1-distill-qwen-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-13T21:50:25.140801+00:00", - "default_parameters": {}, - "default_stops": ["", "", ""], + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -106578,6 +105787,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -106587,7 +105797,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f83ed95f-d902-4fe8-9f3c-d841539e3364", + "id": "96a2baea-9e77-43d3-8aed-32f3cafc2685", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -106596,49 +105806,58 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-13T21:50:25.140801+00:00", - "default_parameters": {}, - "default_stops": ["", "", ""], + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 12B is the second largest in the family of Gemma 3 models after [Gemma 3 27B](google/gemma-3-27b-it)", - "features": {}, - "group": "Gemini", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "google/gemma-3-12b-it", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 12B", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "google/gemma-3-12b-it", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2.5-0127", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "Gemma 3 12B", - "slug": "google/gemma-3-12b-it", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-3-12b-it", - "model_variant_slug": "google/gemma-3-12b-it", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "Novita | google/gemma-3-12b-it", + "name": "Novita | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.0000001", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "completion": "0.00000285", + "discount": 0.05, + "input_cache_read": "0.000000095", + "prompt": "0.00000057" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -106687,12 +105906,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "google/gemma-3-12b-it", + "provider_model_id": "moonshotai/kimi-k2.5", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "novita", + "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -106703,49 +105924,56 @@ "top_k", "repetition_penalty", "response_format", - "structured_outputs" + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Gemini", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "google/gemma-3-12b-it", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 12B", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "google/gemma-3-12b-it", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2.5-0127", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "Gemma 3 12B", - "slug": "google/gemma-3-12b-it", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, { - "author": "google", - "context_length": 98304, - "created_at": "2025-03-12T05:12:39.645813+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["", "", ""], + "author": "nousresearch", + "context_length": 8192, + "created_at": "2024-05-27T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 98304, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -106767,7 +105995,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cf530f36-7cb8-4eb9-8953-880eef952af3", + "id": "6587d520-70bf-4793-87c2-44e9a08fc7f5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -106776,65 +106004,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-12T05:12:39.645813+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["", "", ""], + "author": "nousresearch", + "context_length": 8192, + "created_at": "2024-05-27T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "hf_slug": "NousResearch/Hermes-2-Pro-Llama-3-8B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", + "input_modalities": ["text"], + "instruct_type": "chatml", + "model_version_group_id": null, + "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "nousresearch/hermes-2-pro-llama-3-8b", + "reasoning_config": null, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "Hermes 2 Pro - Llama-3 8B", + "slug": "nousresearch/hermes-2-pro-llama-3-8b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-3-27b-it", - "model_variant_slug": "google/gemma-3-27b-it", + "model_variant_permaslug": "nousresearch/hermes-2-pro-llama-3-8b", + "model_variant_slug": "nousresearch/hermes-2-pro-llama-3-8b", "moderation_required": false, - "name": "Novita | google/gemma-3-27b-it", + "name": "Novita | nousresearch/hermes-2-pro-llama-3-8b", "pricing": { - "completion": "0.0000002", + "completion": "0.00000014", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.000000119", - "request": "0", - "web_search": "0" + "prompt": "0.00000014" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -106883,12 +106090,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "google/gemma-3-27b-it", + "provider_model_id": "nousresearch/hermes-2-pro-llama-3-8b", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp16", + "quantization": "fp16", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -106905,40 +106114,29 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Gemini", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "hf_slug": "NousResearch/Hermes-2-Pro-Llama-3-8B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", + "input_modalities": ["text"], + "instruct_type": "chatml", + "model_version_group_id": null, + "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "nousresearch/hermes-2-pro-llama-3-8b", + "reasoning_config": null, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "Hermes 2 Pro - Llama-3 8B", + "slug": "nousresearch/hermes-2-pro-llama-3-8b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "kwaipilot", - "context_length": 256000, - "created_at": "2025-11-10T03:38:32.123517+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -106946,11 +106144,11 @@ }, "default_stops": [], "default_system": null, - "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. \n\nThe model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 256000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -106959,6 +106157,7 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -106968,7 +106167,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "0394900c-1d75-40e9-90ce-e0b2366143da", + "id": "3dc95f88-3960-4c91-b2bb-bbd2f5069c48", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -106977,13 +106176,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "kwaipilot", - "context_length": 262144, - "created_at": "2025-11-10T03:38:32.123517+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -106991,53 +106190,45 @@ }, "default_stops": [], "default_system": null, - "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. \n\nThe model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Other", + "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Kwaipilot: KAT-Coder-Pro V1", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "kwaipilot/kat-coder-pro-v1", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "KAT-Coder-Pro V1", - "slug": "kwaipilot/kat-coder-pro", - "updated_at": "2026-01-11T23:46:45.088471+00:00", - "warning_message": "The free endpoint has reduced rate limits from January 5-12, after which it will be deprecated. Use [kwaipilot/kat-coder-pro-v1](https://openrouter.ai/kwaipilot/kat-coder-pro) for continued access." + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", + "warning_message": null }, - "model_variant_permaslug": "kwaipilot/kat-coder-pro-v1", - "model_variant_slug": "kwaipilot/kat-coder-pro", + "model_variant_permaslug": "openai/gpt-oss-120b", + "model_variant_slug": "openai/gpt-oss-120b", "moderation_required": false, - "name": "Novita | kwaipilot/kat-coder-pro-v1", + "name": "Novita | openai/gpt-oss-120b", "pricing": { - "completion": "0.000000828", - "discount": 0.31, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000000414", - "internal_reasoning": "0", - "prompt": "0.000000207", - "request": "0", - "web_search": "0" + "completion": "0.00000025", + "discount": 0, + "prompt": "0.00000005" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -107086,12 +106277,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "kwaipilot/kat-coder-pro", + "provider_model_id": "openai/gpt-oss-120b", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp16", - "quantization": "fp16", + "provider_slug": "novita/fp4", + "quantization": "fp4", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -107101,61 +106294,63 @@ "seed", "top_k", "repetition_penalty", - "tools", - "tool_choice", "response_format", - "structured_outputs" + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Other", + "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Kwaipilot: KAT-Coder-Pro V1", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "kwaipilot/kat-coder-pro-v1", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "KAT-Coder-Pro V1", - "slug": "kwaipilot/kat-coder-pro", - "updated_at": "2026-01-11T23:46:45.088471+00:00", - "warning_message": "The free endpoint has reduced rate limits from January 5-12, after which it will be deprecated. Use [kwaipilot/kat-coder-pro-v1](https://openrouter.ai/kwaipilot/kat-coder-pro) for continued access." + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", + "warning_message": null }, { - "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -107164,6 +106359,7 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -107173,7 +106369,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5a9af169-99c1-4922-ad8f-06c4b22b8817", + "id": "c3e700d0-32ff-45cb-a018-c57f16219648", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -107182,49 +106378,61 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8000, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-70B-Instruct", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-70b-instruct", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-20b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3 70B Instruct", - "slug": "meta-llama/llama-3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3-70b-instruct", + "model_variant_permaslug": "openai/gpt-oss-20b", + "model_variant_slug": "openai/gpt-oss-20b", "moderation_required": false, - "name": "Novita | meta-llama/llama-3-70b-instruct", + "name": "Novita | openai/gpt-oss-20b", "pricing": { - "completion": "0.00000074", + "completion": "0.00000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000051", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -107273,12 +106481,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "meta-llama/llama-3-70b-instruct", + "provider_model_id": "openai/gpt-oss-20b", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/fp4", + "quantization": "fp4", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -107292,42 +106502,55 @@ "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-70B-Instruct", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-70b-instruct", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-20b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3 70B Instruct", - "slug": "meta-llama/llama-3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", + "author": "qwen", + "context_length": 32768, + "created_at": "2025-02-01T11:45:11.997326+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -107336,6 +106559,7 @@ "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -107345,7 +106569,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "425a0bee-ae8c-476b-837a-df27836d0f9d", + "id": "35e17b4f-1d57-4c14-84fd-985eb6a0bb27", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -107354,49 +106578,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", + "author": "qwen", + "context_length": 131072, + "created_at": "2025-02-01T11:45:11.997326+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", "features": {}, - "group": "Llama3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", + "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3 8B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Qwen: Qwen2.5 VL 72B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-8b-instruct", + "permaslug": "qwen/qwen2.5-vl-72b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3 8B Instruct", - "slug": "meta-llama/llama-3-8b-instruct", + "short_name": "Qwen2.5 VL 72B Instruct", + "slug": "qwen/qwen2.5-vl-72b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3-8b-instruct", - "model_variant_slug": "meta-llama/llama-3-8b-instruct", + "model_variant_permaslug": "qwen/qwen2.5-vl-72b-instruct", + "model_variant_slug": "qwen/qwen2.5-vl-72b-instruct", "moderation_required": false, - "name": "Novita | meta-llama/llama-3-8b-instruct", + "name": "Novita | qwen/qwen2.5-vl-72b-instruct", "pricing": { - "completion": "0.00000004", + "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000008" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -107445,7 +106664,7 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "meta-llama/llama-3-8b-instruct", + "provider_model_id": "qwen/qwen2.5-vl-72b-instruct", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/bf16", @@ -107468,36 +106687,36 @@ "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", + "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3 8B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Qwen: Qwen2.5 VL 72B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-8b-instruct", + "permaslug": "qwen/qwen2.5-vl-72b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3 8B Instruct", - "slug": "meta-llama/llama-3-8b-instruct", + "short_name": "Qwen2.5 VL 72B Instruct", + "slug": "qwen/qwen2.5-vl-72b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 16384, - "created_at": "2024-07-23T00:00:00+00:00", + "author": "qwen", + "context_length": 131072, + "created_at": "2025-07-21T17:39:15.880992+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 16384, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -107515,7 +106734,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "baecce7a-5028-432e-aaa8-d78b5b700929", + "id": "cf186489-d252-4fb7-aca9-87ef7b557eff", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -107528,45 +106747,50 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 8B instruct-tuned version is fast and efficient.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-07-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-8b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-8b-instruct", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "Novita | meta-llama/llama-3.1-8b-instruct", + "name": "Novita | qwen/qwen3-235b-a22b-07-25", "pricing": { - "completion": "0.00000005", + "completion": "0.00000058", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000009" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -107615,7 +106839,7 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "meta-llama/llama-3.1-8b-instruct", + "provider_model_id": "qwen/qwen3-235b-a22b-instruct-2507", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", @@ -107629,45 +106853,63 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3.1 8B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-8b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-07-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.1 8B Instruct", - "slug": "meta-llama/llama-3.1-8b-instruct", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 32768, - "created_at": "2024-09-25T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "qwen", + "context_length": 131072, + "created_at": "2025-07-25T13:19:17.179049+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -107676,7 +106918,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -107686,7 +106927,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "8e659751-8b3c-41a3-a406-740e20584e06", + "id": "73c2b27b-a51d-4bab-b54d-93ce28732702", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -107695,49 +106936,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32000, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-25T13:19:17.179049+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Meta: Llama 3.2 3B Instruct", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-3b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.2 3B Instruct", - "slug": "meta-llama/llama-3.2-3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.2-3b-instruct", - "model_variant_slug": "meta-llama/llama-3.2-3b-instruct", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", "moderation_required": false, - "name": "Novita | meta-llama/llama-3.2-3b-instruct", + "name": "Novita | qwen/qwen3-235b-a22b-thinking-2507", "pricing": { - "completion": "0.00000005", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -107786,12 +107037,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "meta-llama/llama-3.2-3b-instruct", + "provider_model_id": "qwen/qwen3-235b-a22b-thinking-2507", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -107805,42 +107058,57 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Meta: Llama 3.2 3B Instruct", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-3b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.2 3B Instruct", - "slug": "meta-llama/llama-3.2-3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "qwen", + "context_length": 40960, + "created_at": "2025-04-28T22:16:44.177326+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 40960, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -107859,7 +107127,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "8f74ff86-f435-4f1b-aea0-f7869c7eaa51", + "id": "fabd48b7-f536-4c69-b103-f11528c3f57c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -107868,50 +107136,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 120000, + "max_completion_tokens": 20000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", + "author": "qwen", "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "created_at": "2025-04-28T22:16:44.177326+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", - "features": {}, - "group": "Llama3", + "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "Qwen/Qwen3-30B-A3B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": "qwen3", + "model_version_group_id": null, + "name": "Qwen: Qwen3 30B A3B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-30b-a3b-04-28", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 30B A3B", + "slug": "qwen/qwen3-30b-a3b", + "updated_at": "2026-01-08T19:57:57.475571+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", + "model_variant_slug": "qwen/qwen3-30b-a3b", "moderation_required": false, - "name": "Novita | meta-llama/llama-3.3-70b-instruct", + "name": "Novita | qwen/qwen3-30b-a3b-04-28", "pricing": { - "completion": "0.0000004", + "completion": "0.00000045", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.000000135", - "request": "0", - "web_search": "0" + "prompt": "0.00000009" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -107960,12 +107237,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "meta-llama/llama-3.3-70b-instruct", + "provider_model_id": "qwen/qwen3-30b-a3b-fp8", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -107974,47 +107253,56 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty", - "tools", - "tool_choice" + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_reasoning": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "Qwen/Qwen3-30B-A3B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": "qwen3", + "model_version_group_id": null, + "name": "Qwen: Qwen3 30B A3B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-30b-a3b-04-28", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 30B A3B", + "slug": "qwen/qwen3-30b-a3b", + "updated_at": "2026-01-08T19:57:57.475571+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:37:02.129674+00:00", + "author": "qwen", + "context_length": 40960, + "created_at": "2025-04-28T21:32:25.189881+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 1048576, + "context_length": 40960, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -108026,14 +107314,14 @@ "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, - "literal_none": false, + "literal_none": true, "literal_required": true, - "type_function": false + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "52093e97-a003-4dfa-b247-53ead8bb300c", + "id": "d342f1c5-34b5-4a5c-b2ea-a0cdcbf72e70", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -108042,50 +107330,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 20000, "max_prompt_tokens": null, - "max_tokens_per_image": 3342, + "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:37:02.129674+00:00", + "author": "qwen", + "context_length": 131072, + "created_at": "2025-04-28T21:32:25.189881+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", - "features": {}, - "group": "Llama4", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "" + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": "Qwen/Qwen3-32B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "name": "Qwen: Qwen3 32B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-32b-04-28", + "reasoning_config": { + "end_token": "", + "start_token": "" + }, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Qwen3 32B", + "slug": "qwen/qwen3-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "model_variant_slug": "meta-llama/llama-4-maverick", + "model_variant_permaslug": "qwen/qwen3-32b-04-28", + "model_variant_slug": "qwen/qwen3-32b", "moderation_required": false, - "name": "Novita | meta-llama/llama-4-maverick-17b-128e-instruct", + "name": "Novita | qwen/qwen3-32b-04-28", "pricing": { - "completion": "0.00000085", + "completion": "0.00000045", "discount": 0, - "image": "0.0006684", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -108134,12 +107424,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "meta-llama/llama-4-maverick-17b-128e-instruct-fp8", + "provider_model_id": "qwen/qwen3-32b-fp8", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -108151,42 +107443,50 @@ "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama4", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "" + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": "Qwen/Qwen3-32B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "name": "Qwen: Qwen3 32B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-32b-04-28", + "reasoning_config": { + "end_token": "", + "start_token": "" + }, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Qwen3 32B", + "slug": "qwen/qwen3-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2025-04-05T19:31:59.735804+00:00", + "author": "qwen", + "context_length": 160000, + "created_at": "2025-07-31T14:32:59.359308+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 160000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -108195,7 +107495,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -108205,7 +107504,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "b0352c4a-a51c-4ee0-9f5e-cfa527c3a208", + "id": "ded7fa7f-5261-4772-87fb-96994d95c6b9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -108214,49 +107513,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 10000000, - "created_at": "2025-04-05T19:31:59.735804+00:00", + "author": "qwen", + "context_length": 0, + "created_at": "2025-07-31T14:32:59.359308+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", - "features": {}, - "group": "Llama4", + "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Qwen3 Coder 30B A3B Instruct", + "slug": "qwen/qwen3-coder-30b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", - "model_variant_slug": "meta-llama/llama-4-scout", + "model_variant_permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "model_variant_slug": "qwen/qwen3-coder-30b-a3b-instruct", "moderation_required": false, - "name": "Novita | meta-llama/llama-4-scout-17b-16e-instruct", + "name": "Novita | qwen/qwen3-coder-30b-a3b-instruct", "pricing": { - "completion": "0.00000059", + "completion": "0.00000027", "discount": 0, - "image": "0.0003342", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000018", - "request": "0", - "web_search": "0" + "prompt": "0.00000007" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -108305,11 +107609,11 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "meta-llama/llama-4-scout-17b-16e-instruct", + "provider_model_id": "qwen/qwen3-coder-30b-a3b-instruct", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -108319,45 +107623,59 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama4", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Qwen3 Coder 30B A3B Instruct", + "slug": "qwen/qwen3-coder-30b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "minimax", - "context_length": 1000000, - "created_at": "2025-06-17T22:46:54.257159+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-23T00:29:06+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 1000000, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -108366,9 +107684,6 @@ "training": false }, "features": { - "supported_parameters": { - "structured_outputs": false - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -108378,7 +107693,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "aaecb87e-e042-4c74-836b-936baab09de1", + "id": "223e0b1b-e924-4da4-8d45-74cc9deeb40f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -108387,57 +107702,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 40000, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "minimax", - "context_length": 1000000, - "created_at": "2025-06-17T22:46:54.257159+00:00", + "author": "qwen", + "context_length": 1048576, + "created_at": "2025-07-23T00:29:06+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M1", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m1", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "MiniMax M1", - "slug": "minimax/minimax-m1", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "minimax/minimax-m1", - "model_variant_slug": "minimax/minimax-m1", + "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "model_variant_slug": "qwen/qwen3-coder", "moderation_required": false, - "name": "Novita | minimax/minimax-m1", + "name": "Novita | qwen/qwen3-coder-480b-a35b-07-25", "pricing": { - "completion": "0.00000176", + "completion": "0.0000013", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000044", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -108486,14 +107798,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "minimaxai/minimax-m1-80k", + "provider_model_id": "qwen/qwen3-coder-480b-a35b-instruct", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -108504,45 +107814,49 @@ "top_k", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M1", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m1", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "MiniMax M1", - "slug": "minimax/minimax-m1", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-10-23T20:41:33.120854+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2026-02-04T00:15:01.820167+00:00", "default_parameters": { "frequency_penalty": null, "temperature": 1, @@ -108550,11 +107864,11 @@ }, "default_stops": [], "default_system": null, - "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "description": "Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per token, delivering performance comparable to models with 10 to 20x higher active compute, which makes it well suited for cost-sensitive, always-on agent deployment.\n\nThe model is trained with a strong agentic focus and performs reliably on long-horizon coding tasks, complex tool usage, and recovery from execution failures. With a native 256k context window, it integrates cleanly into real-world CLI and IDE environments and adapts well to common agent scaffolds used by modern coding tools. The model operates exclusively in non-thinking mode and does not emit blocks, simplifying integration for production coding agents.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 204800, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -108563,9 +107877,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "content-string", - "should_send_reasoning_text_in_text_content": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -108575,7 +107886,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "09da61cf-71a6-4717-9ebd-857e7203e3b4", + "id": "b4755e91-8718-4dc6-82e8-5e8d37de8598", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -108584,13 +107895,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-10-23T20:41:33.120854+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2026-02-04T00:15:01.820167+00:00", "default_parameters": { "frequency_penalty": null, "temperature": 1, @@ -108598,53 +107909,43 @@ }, "default_stops": [], "default_system": null, - "description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "description": "Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per token, delivering performance comparable to models with 10 to 20x higher active compute, which makes it well suited for cost-sensitive, always-on agent deployment.\n\nThe model is trained with a strong agentic focus and performs reliably on long-horizon coding tasks, complex tool usage, and recovery from execution failures. With a native 256k context window, it integrates cleanly into real-world CLI and IDE environments and adapts well to common agent scaffolds used by modern coding tools. The model operates exclusively in non-thinking mode and does not emit blocks, simplifying integration for production coding agents.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, - "group": "Other", + "group": "Qwen", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2", + "hf_slug": "Qwen/Qwen3-Coder-Next", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M2", + "name": "Qwen: Qwen3 Coder Next", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2", + "permaslug": "qwen/qwen3-coder-next-2025-02-03", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "MiniMax M2", - "slug": "minimax/minimax-m2", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Coder Next", + "slug": "qwen/qwen3-coder-next", + "updated_at": "2026-02-04T00:27:00.409072+00:00", "warning_message": null }, - "model_variant_permaslug": "minimax/minimax-m2", - "model_variant_slug": "minimax/minimax-m2", + "model_variant_permaslug": "qwen/qwen3-coder-next-2025-02-03", + "model_variant_slug": "qwen/qwen3-coder-next", "moderation_required": false, - "name": "Novita | minimax/minimax-m2", + "name": "Novita | qwen/qwen3-coder-next-2025-02-03", "pricing": { - "completion": "0.0000012", + "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -108693,14 +107994,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "minimax/minimax-m2", + "provider_model_id": "qwen/qwen3-coder-next", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -108711,62 +108010,56 @@ "top_k", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, - "group": "Other", + "group": "Qwen", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2", + "hf_slug": "Qwen/Qwen3-Coder-Next", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M2", + "name": "Qwen: Qwen3 Coder Next", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2", + "permaslug": "qwen/qwen3-coder-next-2025-02-03", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "MiniMax M2", - "slug": "minimax/minimax-m2", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Coder Next", + "slug": "qwen/qwen3-coder-next", + "updated_at": "2026-02-04T00:27:00.409072+00:00", "warning_message": null }, { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-12-23T01:56:37+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.9 - }, + "author": "qwen", + "context_length": 131072, + "created_at": "2025-09-11T17:36:53.6379+00:00", + "default_parameters": {}, "default_stops": [], - "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", - "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "default_system": null, + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 204800, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -108775,18 +108068,16 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": false, + "literal_none": true, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "a300fcbb-ec0e-4c1f-a4d4-d06dc8c19cf9", + "id": "fe32eb3f-05ec-415f-8f63-3e05327d1644", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -108795,67 +108086,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "minimax", - "context_length": 204800, - "created_at": "2025-12-23T01:56:37+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.9 - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", + "default_parameters": {}, "default_stops": [], - "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", - "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "default_system": null, + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2.1", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M2.1", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2.1", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "MiniMax M2.1", - "slug": "minimax/minimax-m2.1", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "minimax/minimax-m2.1", - "model_variant_slug": "minimax/minimax-m2.1", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", "moderation_required": false, - "name": "Novita | minimax/minimax-m2.1", + "name": "Novita | qwen/qwen3-next-80b-a3b-instruct-2509", "pricing": { - "completion": "0.0000012", + "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -108904,14 +108182,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "minimax/minimax-m2.1", + "provider_model_id": "qwen/qwen3-next-80b-a3b-instruct", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -108921,62 +108197,62 @@ "seed", "top_k", "repetition_penalty", + "response_format", + "structured_outputs", "tools", - "tool_choice", - "response_format" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": true - }, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "MiniMaxAI/MiniMax-M2.1", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M2.1", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m2.1", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "MiniMax M2.1", - "slug": "minimax/minimax-m2.1", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 60288, - "created_at": "2024-07-19T00:00:00+00:00", + "author": "qwen", + "context_length": 131072, + "created_at": "2025-09-11T17:38:04.192907+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 60288, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -108985,7 +108261,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -108995,7 +108270,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "b61351b1-7c99-4bab-a6c8-87cb4807cc66", + "id": "9331a6a3-9a03-4527-aed5-9ce3c213a6f3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -109004,51 +108279,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16000, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 131072, - "created_at": "2024-07-19T00:00:00+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:38:04.192907+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA.\n\nThe model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Korean, Arabic, and Hindi.\n\nIt supports function calling and is released under the Apache 2.0 license.", - "features": {}, - "group": "Mistral", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Nemo", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-nemo", - "reasoning_config": null, + "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral Nemo", - "slug": "mistralai/mistral-nemo", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Next 80B A3B Thinking", + "slug": "qwen/qwen3-next-80b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-nemo", - "model_variant_slug": "mistralai/mistral-nemo", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-thinking", "moderation_required": false, - "name": "Novita | mistralai/mistral-nemo", + "name": "Novita | qwen/qwen3-next-80b-a3b-thinking-2509", "pricing": { - "completion": "0.00000017", + "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -109097,12 +108380,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "mistralai/mistral-nemo", + "provider_model_id": "qwen/qwen3-next-80b-a3b-thinking", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -109113,41 +108398,58 @@ "top_k", "repetition_penalty", "response_format", - "structured_outputs" + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Nemo-Instruct-2407", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Nemo", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-nemo", - "reasoning_config": null, + "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral Nemo", - "slug": "mistralai/mistral-nemo", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Next 80B A3B Thinking", + "slug": "qwen/qwen3-next-80b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "moonshotai", + "author": "qwen", "context_length": 131072, - "created_at": "2025-07-11T19:47:32.565514+00:00", - "default_parameters": {}, + "created_at": "2025-09-23T23:04:47+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.7, + "top_p": 0.8 + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, @@ -109160,7 +108462,11 @@ "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -109169,27 +108475,31 @@ } }, "has_chat_completions": true, - "has_completions": false, - "id": "f5265deb-6c78-49da-97e8-0f0b3dae6c5b", + "has_completions": true, + "id": "7f224e57-ba59-49bb-92f7-0fe85eff0e72", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 50, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", + "author": "qwen", "context_length": 131072, - "created_at": "2025-07-11T19:47:32.565514+00:00", - "default_parameters": {}, + "created_at": "2025-09-23T23:04:47+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.7, + "top_p": 0.8 + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "features": { "reasoning_config": { "end_token": null, @@ -109197,42 +108507,36 @@ "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0711", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2", + "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0711", - "slug": "moonshotai/kimi-k2", + "short_name": "Qwen3 VL 235B A22B Instruct", + "slug": "qwen/qwen3-vl-235b-a22b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2", - "model_variant_slug": "moonshotai/kimi-k2", + "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", + "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", "moderation_required": false, - "name": "Novita | moonshotai/kimi-k2", + "name": "Novita | qwen/qwen3-vl-235b-a22b-instruct", "pricing": { - "completion": "0.0000023", + "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000057", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -109281,12 +108585,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "moonshotai/kimi-k2-instruct", + "provider_model_id": "qwen/qwen3-vl-235b-a22b-instruct", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -109297,9 +108603,7 @@ "top_k", "repetition_penalty", "tools", - "tool_choice", - "response_format", - "structured_outputs" + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, @@ -109314,40 +108618,44 @@ "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0711", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2", + "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0711", - "slug": "moonshotai/kimi-k2", + "short_name": "Qwen3 VL 235B A22B Instruct", + "slug": "qwen/qwen3-vl-235b-a22b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", - "default_parameters": {}, + "author": "qwen", + "context_length": 131072, + "created_at": "2025-09-23T23:04:50+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.8, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -109357,7 +108665,7 @@ }, "features": { "supported_parameters": { - "response_format": true, + "response_format": false, "structured_outputs": false }, "supports_tool_choice": { @@ -109369,7 +108677,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "262402a8-8891-4131-bc3c-a76c4bb9b391", + "id": "c611a942-6a7c-43c7-b26e-d91cf0fb7b83", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -109378,59 +108686,58 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", - "default_parameters": {}, + "author": "qwen", + "context_length": 131072, + "created_at": "2025-09-23T23:04:50+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.8, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "features": { "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Thinking", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen3 VL 235B A22B Thinking", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "qwen/qwen3-vl-235b-a22b-thinking", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 VL 235B A22B Thinking", + "slug": "qwen/qwen3-vl-235b-a22b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-0905", - "model_variant_slug": "moonshotai/kimi-k2-0905", + "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-thinking", + "model_variant_slug": "qwen/qwen3-vl-235b-a22b-thinking", "moderation_required": false, - "name": "Novita | moonshotai/kimi-k2-0905", + "name": "Novita | qwen/qwen3-vl-235b-a22b-thinking", "pricing": { - "completion": "0.0000025", + "completion": "0.00000395", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000098" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -109479,13 +108786,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "moonshotai/kimi-k2-0905", + "provider_model_id": "qwen/qwen3-vl-235b-a22b-thinking", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ - "response_format", + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -109494,62 +108802,59 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty", - "structured_outputs", - "tools", - "tool_choice" + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_reasoning": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Thinking", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen3 VL 235B A22B Thinking", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "qwen/qwen3-vl-235b-a22b-thinking", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 VL 235B A22B Thinking", + "slug": "qwen/qwen3-vl-235b-a22b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "author": "qwen", + "context_length": 131072, + "created_at": "2025-10-06T23:47:56.430294+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -109558,8 +108863,10 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -109569,7 +108876,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "a5cbd682-c60f-49d4-afda-520de76d9435", + "id": "0ae33e05-f2b8-49bd-8a58-f61a835466bd", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -109578,65 +108885,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", + "author": "qwen", "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "created_at": "2025-10-06T23:47:56.430294+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 VL 30B A3B Instruct", + "slug": "qwen/qwen3-vl-30b-a3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", + "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "model_variant_slug": "qwen/qwen3-vl-30b-a3b-instruct", "moderation_required": false, - "name": "Novita | moonshotai/kimi-k2-thinking-20251106", + "name": "Novita | qwen/qwen3-vl-30b-a3b-instruct", "pricing": { - "completion": "0.0000025", + "completion": "0.0000007", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000015", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -109685,14 +108986,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "moonshotai/kimi-k2-thinking", + "provider_model_id": "qwen/qwen3-vl-30b-a3b-instruct", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/bf16", "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -109702,13 +109003,11 @@ "seed", "top_k", "repetition_penalty", - "response_format", - "structured_outputs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" @@ -109716,45 +109015,48 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 VL 30B A3B Instruct", + "slug": "qwen/qwen3-vl-30b-a3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "gryphe", - "context_length": 4096, - "created_at": "2023-07-02T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "qwen", + "context_length": 131072, + "created_at": "2025-10-06T23:47:59.575824+00:00", + "default_parameters": { + "temperature": 0.8, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", + "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 4096, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -109763,6 +109065,11 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -109772,7 +109079,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "285f869c-d6ef-4087-904c-b376da96e65d", + "id": "38ee90ad-31ec-4a90-925c-a95d9829636a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -109781,49 +109088,57 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 3200, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "gryphe", - "context_length": 4096, - "created_at": "2023-07-02T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["###", ""], + "author": "qwen", + "context_length": 0, + "created_at": "2025-10-06T23:47:59.575824+00:00", + "default_parameters": { + "temperature": 0.8, + "top_p": 0.95 + }, + "default_stops": [], "default_system": null, - "description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge", - "features": {}, - "group": "Llama2", + "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "Gryphe/MythoMax-L2-13b", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Thinking", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "alpaca", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "MythoMax 13B", + "name": "Qwen: Qwen3 VL 30B A3B Thinking", "output_modalities": ["text"], - "permaslug": "gryphe/mythomax-l2-13b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-vl-30b-a3b-thinking", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "MythoMax 13B", - "slug": "gryphe/mythomax-l2-13b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 VL 30B A3B Thinking", + "slug": "qwen/qwen3-vl-30b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "gryphe/mythomax-l2-13b", - "model_variant_slug": "gryphe/mythomax-l2-13b", + "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-thinking", + "model_variant_slug": "qwen/qwen3-vl-30b-a3b-thinking", "moderation_required": false, - "name": "Novita | gryphe/mythomax-l2-13b", + "name": "Novita | qwen/qwen3-vl-30b-a3b-thinking", "pricing": { - "completion": "0.00000009", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -109872,12 +109187,16 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "gryphe/mythomax-l2-13b", + "provider_model_id": "qwen/qwen3-vl-30b-a3b-thinking", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp16", "quantization": "fp16", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -109886,45 +109205,61 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama2", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "Gryphe/MythoMax-L2-13b", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Thinking", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "alpaca", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "MythoMax 13B", + "name": "Qwen: Qwen3 VL 30B A3B Thinking", "output_modalities": ["text"], - "permaslug": "gryphe/mythomax-l2-13b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-vl-30b-a3b-thinking", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "MythoMax 13B", - "slug": "gryphe/mythomax-l2-13b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 VL 30B A3B Thinking", + "slug": "qwen/qwen3-vl-30b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "nousresearch", - "context_length": 8192, - "created_at": "2024-05-27T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "qwen", + "context_length": 131072, + "created_at": "2025-10-14T17:35:08.402158+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.7, + "top_p": 0.8 + }, + "default_stops": [], "default_system": null, - "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.", + "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -109946,7 +109281,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "6587d520-70bf-4793-87c2-44e9a08fc7f5", + "id": "6bfd6b5b-ff8c-436f-ae3f-d28c02bcfce3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -109955,49 +109290,58 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nousresearch", - "context_length": 8192, - "created_at": "2024-05-27T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "qwen", + "context_length": 256000, + "created_at": "2025-10-14T17:35:08.402158+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.7, + "top_p": 0.8 + }, + "default_stops": [], "default_system": null, - "description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house.", - "features": {}, - "group": "Llama3", + "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-2-Pro-Llama-3-8B", + "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", + "input_modalities": ["image", "text"], + "instruct_type": null, "model_version_group_id": null, - "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", + "name": "Qwen: Qwen3 VL 8B Instruct", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-2-pro-llama-3-8b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-vl-8b-instruct", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Hermes 2 Pro - Llama-3 8B", - "slug": "nousresearch/hermes-2-pro-llama-3-8b", + "short_name": "Qwen3 VL 8B Instruct", + "slug": "qwen/qwen3-vl-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "nousresearch/hermes-2-pro-llama-3-8b", - "model_variant_slug": "nousresearch/hermes-2-pro-llama-3-8b", + "model_variant_permaslug": "qwen/qwen3-vl-8b-instruct", + "model_variant_slug": "qwen/qwen3-vl-8b-instruct", "moderation_required": false, - "name": "Novita | nousresearch/hermes-2-pro-llama-3-8b", + "name": "Novita | qwen/qwen3-vl-8b-instruct", "pricing": { - "completion": "0.00000014", + "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000014", - "request": "0", - "web_search": "0" + "prompt": "0.00000008" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -110046,11 +109390,11 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "nousresearch/hermes-2-pro-llama-3-8b", + "provider_model_id": "qwen/qwen3-vl-8b-instruct", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp16", - "quantization": "fp16", + "provider_slug": "novita/fp8", + "quantization": "fp8", "supported_parameters": [ "structured_outputs", "response_format", @@ -110062,49 +109406,57 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "NousResearch/Hermes-2-Pro-Llama-3-8B", + "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", + "input_modalities": ["image", "text"], + "instruct_type": null, "model_version_group_id": null, - "name": "NousResearch: Hermes 2 Pro - Llama-3 8B", + "name": "Qwen: Qwen3 VL 8B Instruct", "output_modalities": ["text"], - "permaslug": "nousresearch/hermes-2-pro-llama-3-8b", - "reasoning_config": null, + "permaslug": "qwen/qwen3-vl-8b-instruct", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Hermes 2 Pro - Llama-3 8B", - "slug": "nousresearch/hermes-2-pro-llama-3-8b", + "short_name": "Qwen3 VL 8B Instruct", + "slug": "qwen/qwen3-vl-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "qwen", + "context_length": 32000, + "created_at": "2024-09-19T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 32000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -110113,7 +109465,7 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -110123,7 +109475,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "3dc95f88-3960-4c91-b2bb-bbd2f5069c48", + "id": "a2a8a9fd-2784-448f-99b8-076fb6e1d8e5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -110132,65 +109484,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", + "author": "qwen", "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "created_at": "2024-09-19T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "GPT", + "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen2.5-72B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Qwen2.5 72B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "qwen/qwen-2.5-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen2.5 72B Instruct", + "slug": "qwen/qwen-2.5-72b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b", - "model_variant_slug": "openai/gpt-oss-120b", + "model_variant_permaslug": "qwen/qwen-2.5-72b-instruct", + "model_variant_slug": "qwen/qwen-2.5-72b-instruct", "moderation_required": false, - "name": "Novita | openai/gpt-oss-120b", + "name": "Novita | qwen/qwen-2.5-72b-instruct", "pricing": { - "completion": "0.00000025", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000038" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -110239,14 +109570,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "openai/gpt-oss-120b", + "provider_model_id": "qwen/qwen-2.5-72b-instruct", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp4", - "quantization": "fp4", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -110262,57 +109591,42 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "GPT", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen2.5-72B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Qwen2.5 72B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "qwen/qwen-2.5-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen2.5 72B Instruct", + "slug": "qwen/qwen-2.5-72b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "sao10k", + "context_length": 8192, + "created_at": "2024-08-13T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -110321,7 +109635,10 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -110331,7 +109648,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c3e700d0-32ff-45cb-a018-c57f16219648", + "id": "d5d16ef3-adbc-4e45-8421-bc7d343e902f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -110340,67 +109657,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "sao10k", + "context_length": 8192, + "created_at": "2024-08-13T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "GPT", + "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Sao10K/L3-8B-Lunaris-v1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Sao10K: Llama 3 8B Lunaris", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "sao10k/l3-lunaris-8b", + "reasoning_config": null, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 3 8B Lunaris", + "slug": "sao10k/l3-lunaris-8b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "sao10k/l3-lunaris-8b", + "model_variant_slug": "sao10k/l3-lunaris-8b", "moderation_required": false, - "name": "Novita | openai/gpt-oss-20b", + "name": "Novita | sao10k/l3-lunaris-8b", "pricing": { - "completion": "0.00000015", + "completion": "0.00000005", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000005" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -110449,14 +109743,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "openai/gpt-oss-20b", + "provider_model_id": "sao10k/l3-8b-lunaris", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp4", - "quantization": "fp4", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -110465,60 +109759,45 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty", - "response_format", - "structured_outputs" + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "GPT", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Sao10K/L3-8B-Lunaris-v1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Sao10K: Llama 3 8B Lunaris", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "sao10k/l3-lunaris-8b", + "reasoning_config": null, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 3 8B Lunaris", + "slug": "sao10k/l3-lunaris-8b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32768, - "created_at": "2025-02-01T11:45:11.997326+00:00", + "author": "sao10k", + "context_length": 8192, + "created_at": "2024-06-18T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).\n\n- Better prompt adherence.\n- Better anatomy / spatial awareness.\n- Adapts much better to unique and custom formatting / reply formats.\n- Very creative, lots of unique swipes.\n- Is not restrictive during roleplays.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -110527,7 +109806,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -110537,7 +109815,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "35e17b4f-1d57-4c14-84fd-985eb6a0bb27", + "id": "2ab67dc9-421b-408e-b61a-b4b86ea1df70", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -110546,49 +109824,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-02-01T11:45:11.997326+00:00", + "author": "sao10k", + "context_length": 8192, + "created_at": "2024-06-18T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).\n\n- Better prompt adherence.\n- Better anatomy / spatial awareness.\n- Adapts much better to unique and custom formatting / reply formats.\n- Very creative, lots of unique swipes.\n- Is not restrictive during roleplays.", "features": {}, - "group": "Qwen", + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "Sao10K/L3-70B-Euryale-v2.1", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "Sao10k: Llama 3 Euryale 70B v2.1", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", + "permaslug": "sao10k/l3-euryale-70b", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", + "short_name": "Llama 3 Euryale 70B v2.1", + "slug": "sao10k/l3-euryale-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen2.5-vl-72b-instruct", - "model_variant_slug": "qwen/qwen2.5-vl-72b-instruct", + "model_variant_permaslug": "sao10k/l3-euryale-70b", + "model_variant_slug": "sao10k/l3-euryale-70b", "moderation_required": false, - "name": "Novita | qwen/qwen2.5-vl-72b-instruct", + "name": "Novita | sao10k/l3-euryale-70b", "pricing": { - "completion": "0.0000008", + "completion": "0.00000148", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000008", - "request": "0", - "web_search": "0" + "prompt": "0.00000148" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -110637,7 +109910,7 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen/qwen2.5-vl-72b-instruct", + "provider_model_id": "sao10k/l3-70b-euryale-v2.1", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/bf16", @@ -110651,45 +109924,47 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Qwen", + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "Sao10K/L3-70B-Euryale-v2.1", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "Sao10k: Llama 3 Euryale 70B v2.1", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", + "permaslug": "sao10k/l3-euryale-70b", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", + "short_name": "Llama 3 Euryale 70B v2.1", + "slug": "sao10k/l3-euryale-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-07-21T17:39:15.880992+00:00", + "author": "sao10k", + "context_length": 8192, + "created_at": "2024-08-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -110707,7 +109982,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cf186489-d252-4fb7-aca9-87ef7b557eff", + "id": "9170f475-2eac-4b4d-950d-22be425df236", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -110716,59 +109991,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 8192, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", + "author": "sao10k", + "context_length": 131072, + "created_at": "2024-08-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "sao10k/l3.1-euryale-70b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Llama 3.1 Euryale 70B v2.2", + "slug": "sao10k/l3.1-euryale-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "sao10k/l3.1-euryale-70b", + "model_variant_slug": "sao10k/l3.1-euryale-70b", "moderation_required": false, - "name": "Novita | qwen/qwen3-235b-a22b-07-25", + "name": "Novita | sao10k/l3.1-euryale-70b", "pricing": { - "completion": "0.00000058", + "completion": "0.00000148", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.00000148" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -110817,7 +110077,7 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen/qwen3-235b-a22b-instruct-2507", + "provider_model_id": "sao10k/l31-70b-euryale-v2.2", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", @@ -110832,8 +110092,6 @@ "seed", "top_k", "repetition_penalty", - "response_format", - "structured_outputs", "tools", "tool_choice" ], @@ -110843,51 +110101,37 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "sao10k/l3.1-euryale-70b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Llama 3.1 Euryale 70B v2.2", + "slug": "sao10k/l3.1-euryale-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-07-25T13:19:17.179049+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "author": "microsoft", + "context_length": 65535, + "created_at": "2024-04-16T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["USER:", ""], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 65535, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -110905,7 +110149,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "73c2b27b-a51d-4bab-b54d-93ce28732702", + "id": "92d0658f-b48a-4746-96e7-75ea71d5e060", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -110914,65 +110158,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 8000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-25T13:19:17.179049+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "author": "microsoft", + "context_length": 65536, + "created_at": "2024-04-16T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["USER:", ""], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "hf_slug": "microsoft/WizardLM-2-8x22B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": "vicuna", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "name": "WizardLM-2 8x22B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "microsoft/wizardlm-2-8x22b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "WizardLM-2 8x22B", + "slug": "microsoft/wizardlm-2-8x22b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", - "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_permaslug": "microsoft/wizardlm-2-8x22b", + "model_variant_slug": "microsoft/wizardlm-2-8x22b", "moderation_required": false, - "name": "Novita | qwen/qwen3-235b-a22b-thinking-2507", + "name": "Novita | microsoft/wizardlm-2-8x22b", "pricing": { - "completion": "0.000003", + "completion": "0.00000062", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000062" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -111021,14 +110244,12 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen/qwen3-235b-a22b-thinking-2507", + "provider_model_id": "microsoft/wizardlm-2-8x22b", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -111037,62 +110258,49 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty", - "tools", - "tool_choice" + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "hf_slug": "microsoft/WizardLM-2-8x22B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": "vicuna", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "name": "WizardLM-2 8x22B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "microsoft/wizardlm-2-8x22b", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "WizardLM-2 8x22B", + "slug": "microsoft/wizardlm-2-8x22b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T22:16:44.177326+00:00", + "author": "xiaomi", + "context_length": 262144, + "created_at": "2025-12-14T16:55:08+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, - "top_p": null + "top_p": 0.95 }, - "default_stops": ["<|im_start|>", "<|im_end|>"], - "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "default_stops": [], + "default_system": "You are MiMo, an AI assistant developed by Xiaomi.\n\nYour knowledge cutoff date is December 2024.", + "description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config).", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 40960, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -111101,17 +110309,17 @@ "training": false }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, - "type_function": true + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "fabd48b7-f536-4c69-b103-f11528c3f57c", + "id": "02f4b80a-d402-4254-94dc-b6fd10940a21", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -111120,21 +110328,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 20000, + "max_completion_tokens": 32000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T22:16:44.177326+00:00", + "author": "xiaomi", + "context_length": 262144, + "created_at": "2025-12-14T16:55:08+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, - "top_p": null + "top_p": 0.95 }, - "default_stops": ["<|im_start|>", "<|im_end|>"], - "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "default_stops": [], + "default_system": "You are MiMo, an AI assistant developed by Xiaomi.\n\nYour knowledge cutoff date is December 2024.", + "description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config).", "features": { "chat_template_config": {}, "reasoning_config": { @@ -111143,41 +110351,37 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": "XiaomiMiMo/MiMo-V2-Flash", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "name": "Xiaomi: MiMo-V2-Flash", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "permaslug": "xiaomi/mimo-v2-flash-20251210", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "MiMo-V2-Flash", + "slug": "xiaomi/mimo-v2-flash", + "updated_at": "2026-01-21T16:26:04.702272+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", - "model_variant_slug": "qwen/qwen3-30b-a3b", + "model_variant_permaslug": "xiaomi/mimo-v2-flash-20251210", + "model_variant_slug": "xiaomi/mimo-v2-flash", "moderation_required": false, - "name": "Novita | qwen/qwen3-30b-a3b-04-28", + "name": "Novita | xiaomi/mimo-v2-flash-20251210", "pricing": { - "completion": "0.00000045", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000002", + "prompt": "0.0000001" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -111226,11 +110430,11 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen/qwen3-30b-a3b-fp8", + "provider_model_id": "xiaomimimo/mimo-v2-flash", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita", + "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", @@ -111242,11 +110446,14 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, @@ -111258,40 +110465,44 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": "XiaomiMiMo/MiMo-V2-Flash", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "name": "Xiaomi: MiMo-V2-Flash", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "permaslug": "xiaomi/mimo-v2-flash-20251210", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "MiMo-V2-Flash", + "slug": "xiaomi/mimo-v2-flash", + "updated_at": "2026-01-21T16:26:04.702272+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T21:32:25.189881+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "author": "z-ai", + "context_length": 131072, + "created_at": "2025-07-25T19:22:27.278283+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.75, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 40960, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -111300,7 +110511,7 @@ "training": false }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -111310,7 +110521,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "d342f1c5-34b5-4a5c-b2ea-a0cdcbf72e70", + "id": "e4ada6ec-12e7-49c9-837c-398dee612403", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -111319,57 +110530,60 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 20000, + "max_completion_tokens": 98304, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "z-ai", "context_length": 131072, - "created_at": "2025-04-28T21:32:25.189881+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "created_at": "2025-07-25T19:22:27.278283+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.75, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": "zai-org/GLM-4.5", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "Z.AI: GLM 4.5", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "permaslug": "z-ai/glm-4.5", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.5", + "slug": "z-ai/glm-4.5", + "updated_at": "2026-01-05T22:04:10.598351+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-32b-04-28", - "model_variant_slug": "qwen/qwen3-32b", + "model_variant_permaslug": "z-ai/glm-4.5", + "model_variant_slug": "z-ai/glm-4.5", "moderation_required": false, - "name": "Novita | qwen/qwen3-32b-04-28", + "name": "Novita | z-ai/glm-4.5", "pricing": { - "completion": "0.00000045", + "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000011", + "prompt": "0.0000006" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -111418,7 +110632,7 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen/qwen3-32b-fp8", + "provider_model_id": "zai-org/glm-4.5", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", @@ -111434,57 +110648,62 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": "zai-org/GLM-4.5", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "Z.AI: GLM 4.5", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "permaslug": "z-ai/glm-4.5", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.5", + "slug": "z-ai/glm-4.5", + "updated_at": "2026-01-05T22:04:10.598351+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 128000, - "created_at": "2025-04-28T21:43:52.421936+00:00", + "author": "z-ai", + "context_length": 131072, + "created_at": "2025-07-25T19:20:58.066206+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, + "temperature": 0.75, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue. It supports seamless switching between \"thinking\" mode for math, coding, and logical inference, and \"non-thinking\" mode for general conversation. The model is fine-tuned for instruction-following, agent integration, creative writing, and multilingual use across 100+ languages and dialects. It natively supports a 32K token context window and can extend to 131K tokens with YaRN scaling.", + "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -111493,7 +110712,12 @@ "training": false }, "features": { - "supported_parameters": {}, + "is_mandatory_reasoning": false, + "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": { + "response_format": true, + "structured_outputs": false + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -111503,7 +110727,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "25334f6f-1e4a-4177-abc3-26260f04e754", + "id": "90fc9bb3-01bb-419f-bb58-dea60a80d201", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -111512,64 +110736,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 20000, + "max_completion_tokens": 98304, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", + "author": "z-ai", "context_length": 131072, - "created_at": "2025-04-28T21:43:52.421936+00:00", + "created_at": "2025-07-25T19:20:58.066206+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, + "temperature": 0.75, "top_p": null }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue. It supports seamless switching between \"thinking\" mode for math, coding, and logical inference, and \"non-thinking\" mode for general conversation. The model is fine-tuned for instruction-following, agent integration, creative writing, and multilingual use across 100+ languages and dialects. It natively supports a 32K token context window and can extend to 131K tokens with YaRN scaling.", + "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-8B", + "hf_slug": "zai-org/GLM-4.5-Air", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 8B", + "name": "Z.AI: GLM 4.5 Air", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-8b-04-28", + "permaslug": "z-ai/glm-4.5-air", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 8B", - "slug": "qwen/qwen3-8b", - "updated_at": "2026-01-08T19:54:39.933523+00:00", + "short_name": "GLM 4.5 Air", + "slug": "z-ai/glm-4.5-air", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-8b-04-28", - "model_variant_slug": "qwen/qwen3-8b", + "model_variant_permaslug": "z-ai/glm-4.5-air", + "model_variant_slug": "z-ai/glm-4.5-air", "moderation_required": false, - "name": "Novita | qwen/qwen3-8b-04-28", + "name": "Novita | z-ai/glm-4.5-air", "pricing": { - "completion": "0.000000138", + "completion": "0.00000085", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000035", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000025", + "prompt": "0.00000013" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -111618,11 +110837,11 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen/qwen3-8b-fp8", + "provider_model_id": "zai-org/glm-4.5-air", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ "reasoning", "include_reasoning", @@ -111634,56 +110853,61 @@ "presence_penalty", "seed", "top_k", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-8B", + "hf_slug": "zai-org/GLM-4.5-Air", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 8B", + "name": "Z.AI: GLM 4.5 Air", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-8b-04-28", + "permaslug": "z-ai/glm-4.5-air", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 8B", - "slug": "qwen/qwen3-8b", - "updated_at": "2026-01-08T19:54:39.933523+00:00", + "short_name": "GLM 4.5 Air", + "slug": "z-ai/glm-4.5-air", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 160000, - "created_at": "2025-07-31T14:32:59.359308+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "z-ai", + "context_length": 65536, + "created_at": "2025-08-11T14:24:48.340676+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.75, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", + "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 160000, + "context_length": 65536, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -111692,6 +110916,10 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": {}, + "supports_input_audio": false, + "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -111701,7 +110929,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "ded7fa7f-5261-4772-87fb-96994d95c6b9", + "id": "30f10717-f218-430f-a3d7-858bf5fdf303", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -111710,59 +110938,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 0, - "created_at": "2025-07-31T14:32:59.359308+00:00", - "default_parameters": {}, + "author": "z-ai", + "context_length": 65536, + "created_at": "2025-08-11T14:24:48.340676+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.75, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", + "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "features": { "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "hf_slug": "zai-org/GLM-4.5V", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 30B A3B Instruct", + "name": "Z.AI: GLM 4.5V", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "permaslug": "z-ai/glm-4.5v", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 30B A3B Instruct", - "slug": "qwen/qwen3-coder-30b-a3b-instruct", + "short_name": "GLM 4.5V", + "slug": "z-ai/glm-4.5v", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-30b-a3b-instruct", - "model_variant_slug": "qwen/qwen3-coder-30b-a3b-instruct", + "model_variant_permaslug": "z-ai/glm-4.5v", + "model_variant_slug": "z-ai/glm-4.5v", "moderation_required": false, - "name": "Novita | qwen/qwen3-coder-30b-a3b-instruct", + "name": "Novita | z-ai/glm-4.5v", "pricing": { - "completion": "0.00000027", + "completion": "0.0000018", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000007", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000011", + "prompt": "0.0000006" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -111811,12 +111039,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen/qwen3-coder-30b-a3b-instruct", + "provider_model_id": "zai-org/glm-4.5v", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -111832,52 +111062,56 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "hf_slug": "zai-org/GLM-4.5V", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 30B A3B Instruct", + "name": "Z.AI: GLM 4.5V", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "permaslug": "z-ai/glm-4.5v", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 30B A3B Instruct", - "slug": "qwen/qwen3-coder-30b-a3b-instruct", + "short_name": "GLM 4.5V", + "slug": "z-ai/glm-4.5v", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-23T00:29:06+00:00", - "default_parameters": {}, + "author": "z-ai", + "context_length": 204800, + "created_at": "2025-09-30T12:32:56.306946+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 204800, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -111886,6 +111120,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -111895,7 +111130,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "223e0b1b-e924-4da4-8d45-74cc9deeb40f", + "id": "562fe77f-c92f-4556-b43b-ea7da6891b6a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -111904,60 +111139,60 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 1048576, - "created_at": "2025-07-23T00:29:06+00:00", - "default_parameters": {}, + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-09-30T12:32:56.306946+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "model_variant_slug": "qwen/qwen3-coder", + "model_variant_permaslug": "z-ai/glm-4.6", + "model_variant_slug": "z-ai/glm-4.6", "moderation_required": false, - "name": "Novita | qwen/qwen3-coder-480b-a35b-07-25", + "name": "Novita | z-ai/glm-4.6", "pricing": { - "completion": "0.0000013", + "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000011", + "prompt": "0.00000055" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -112006,12 +111241,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen/qwen3-coder-480b-a35b-instruct", + "provider_model_id": "zai-org/glm-4.6", "provider_name": "Novita", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "novita/bf16", + "quantization": "bf16", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -112021,54 +111258,59 @@ "seed", "top_k", "repetition_penalty", - "tools", - "tool_choice", "response_format", - "structured_outputs" + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, { - "author": "qwen", + "author": "z-ai", "context_length": 131072, - "created_at": "2025-09-11T17:36:53.6379+00:00", - "default_parameters": {}, + "created_at": "2025-12-08T15:24:22.464154+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.8, + "top_p": 0.6 + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, @@ -112081,16 +111323,18 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": false }, "has_chat_completions": true, "has_completions": true, - "id": "fe32eb3f-05ec-415f-8f63-3e05327d1644", + "id": "a44ea9fe-fc2b-4765-804b-2e31215b5ca5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -112103,55 +111347,56 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", - "default_parameters": {}, + "author": "z-ai", + "context_length": 131072, + "created_at": "2025-12-08T15:24:22.464154+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.8, + "top_p": 0.6 + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "hf_slug": "zai-org/GLM-4.6V", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "name": "Z.AI: GLM 4.6V", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "permaslug": "z-ai/glm-4.6-20251208", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6V", + "slug": "z-ai/glm-4.6v", + "updated_at": "2025-12-08T15:45:24.970322+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", + "model_variant_permaslug": "z-ai/glm-4.6-20251208", + "model_variant_slug": "z-ai/glm-4.6v", "moderation_required": false, - "name": "Novita | qwen/qwen3-next-80b-a3b-instruct-2509", + "name": "Novita | z-ai/glm-4.6-20251208", "pricing": { - "completion": "0.0000015", + "completion": "0.0000009", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000055", + "prompt": "0.0000003" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -112200,12 +111445,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen/qwen3-next-80b-a3b-instruct", + "provider_model_id": "zai-org/glm-4.6v", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/bf16", "quantization": "bf16", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -112215,268 +111462,203 @@ "seed", "top_k", "repetition_penalty", - "response_format", - "structured_outputs", "tools", - "tool_choice" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "hf_slug": "zai-org/GLM-4.6V", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "name": "Z.AI: GLM 4.6V", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "permaslug": "z-ai/glm-4.6-20251208", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6V", + "slug": "z-ai/glm-4.6v", + "updated_at": "2025-12-08T15:45:24.970322+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-11T17:38:04.192907+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "Other", + "context_length": 202752, + "created_at": "2025-08-26T20:08:47.000Z", + "default_parameters": null, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", + "description": "GLM-4.7 is Z.AI's latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "other", "can_abort": true, - "context_length": 131072, + "context_length": 202752, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", - "training": false - }, - "features": { - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } + "retainsPrompts": true, + "training": true }, + "features": null, "has_chat_completions": true, - "has_completions": true, - "id": "9331a6a3-9a03-4527-aed5-9ce3c213a6f3", + "has_completions": false, + "id": "z-ai/glm-4.7:free", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, - "max_prompt_tokens": null, + "max_completion_tokens": 65535, + "max_prompt_images": null, + "max_prompt_tokens": 202752, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:38:04.192907+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "Other", + "context_length": 202752, + "created_at": "2025-08-26T20:08:47.000Z", + "default_parameters": null, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "GLM-4.7 is Z.AI's latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "features": null, + "group": "other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "name": "Z.AI: GLM 4.7 (free)", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "z-ai/glm-4.7:free", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Next 80B A3B Thinking", - "slug": "qwen/qwen3-next-80b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Z.AI: GLM 4.7 (free)", + "slug": "z-ai/glm-4.7:free", + "updated_at": "2026-02-10T10:49:03.904Z", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-thinking", + "model_variant_permaslug": "z-ai/glm-4.7:free", + "model_variant_slug": "z-ai/glm-4.7:free", "moderation_required": false, - "name": "Novita | qwen/qwen3-next-80b-a3b-thinking-2509", + "name": "Z.AI: GLM 4.7 (free)", "pricing": { - "completion": "0.0000015", + "completion": "0.0000000", "discount": 0, "image": "0", "image_output": "0", + "input_cache_read": "0.00000000", "internal_reasoning": "0", - "prompt": "0.00000015", + "prompt": "0.0000000", "request": "0", "web_search": "0" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "Other", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", - "byokEnabled": true, + "adapterName": "other", + "baseUrl": "https://kilo.ai", + "byokEnabled": false, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", - "training": false + "retainsPrompts": true, + "training": true }, - "displayName": "NovitaAI", - "editors": ["{}"], + "displayName": "Other", + "editors": [], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, + "headquarters": "Unknown", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "rounded-sm", + "url": "https://via.placeholder.com/32x32/000000/FFFFFF?text=S" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "name": "Other", + "owners": [], + "slug": "other", + "statusPageUrl": null }, - "provider_model_id": "qwen/qwen3-next-80b-a3b-thinking", - "provider_name": "Novita", + "provider_model_id": "z-ai/glm-4.7:free", + "provider_name": "Other", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "other", + "quantization": null, "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "seed", - "top_k", - "repetition_penalty", - "response_format", - "structured_outputs", "tools", - "tool_choice" + "reasoning", + "include_reasoning" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" - }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } + "variant": "default" }, - "group": "Qwen3", + "features": null, + "group": "other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "name": "Z.AI: GLM 4.7 (free)", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "z-ai/glm-4.7:free", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Next 80B A3B Thinking", - "slug": "qwen/qwen3-next-80b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Z.AI: GLM 4.7 (free)", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-02-10T10:49:03.904Z", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-23T23:04:47+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2026-01-19T14:45:13.352372+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.", "endpoint": { "adapter_name": "NovitaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", @@ -112485,21 +111667,17 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_multipart": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true + "literal_none": false, + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": true, - "id": "7f224e57-ba59-49bb-92f7-0fe85eff0e72", + "id": "6999081f-9017-4517-bbdb-47df8778c940", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -112508,63 +111686,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-23T23:04:47+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2026-01-19T14:45:13.352372+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.", "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } + "chat_template_config": {}, + "reasoning_config": {} }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "hf_slug": "zai-org/GLM-4.7-Flash", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "name": "Z.AI: GLM 4.7 Flash", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "z-ai/glm-4.7-flash-20260119", + "reasoning_config": {}, "router": null, - "short_name": "Qwen3 VL 235B A22B Instruct", - "slug": "qwen/qwen3-vl-235b-a22b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7 Flash", + "slug": "z-ai/glm-4.7-flash", + "updated_at": "2026-01-19T15:38:17.116015+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", + "model_variant_permaslug": "z-ai/glm-4.7-flash-20260119", + "model_variant_slug": "z-ai/glm-4.7-flash", "moderation_required": false, - "name": "Novita | qwen/qwen3-vl-235b-a22b-instruct", + "name": "Novita | z-ai/glm-4.7-flash-20260119", "pricing": { - "completion": "0.0000015", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000001", + "prompt": "0.00000007" }, "provider_display_name": "NovitaAI", "provider_info": { @@ -112613,14 +111780,14 @@ "slug": "novita", "statusPageUrl": "https://status.novita.ai/" }, - "provider_model_id": "qwen/qwen3-vl-235b-a22b-instruct", + "provider_model_id": "zai-org/glm-4.7-flash", "provider_name": "Novita", "provider_region": null, "provider_slug": "novita/bf16", "quantization": "bf16", "supported_parameters": [ - "structured_outputs", - "response_format", + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -112631,484 +111798,441 @@ "top_k", "repetition_penalty", "tools", - "tool_choice" + "tool_choice", + "response_format" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } + "chat_template_config": {}, + "reasoning_config": {} }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "hf_slug": "zai-org/GLM-4.7-Flash", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "name": "Z.AI: GLM 4.7 Flash", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "z-ai/glm-4.7-flash-20260119", + "reasoning_config": {}, "router": null, - "short_name": "Qwen3 VL 235B A22B Instruct", - "slug": "qwen/qwen3-vl-235b-a22b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7 Flash", + "slug": "z-ai/glm-4.7-flash", + "updated_at": "2026-01-19T15:38:17.116015+00:00", "warning_message": null - }, + } + ], + "name": "Novita", + "slug": "novita" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "datacenters": ["US"], + "displayName": "NVIDIA", + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.nvidia.com/en-us/&size=256" + }, + "models": [ { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-23T23:04:50+00:00", + "author": "nvidia", + "context_length": 256000, + "created_at": "2025-12-14T16:54:35+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.8, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "NvidiaAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 256000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", - "training": false + "privacyPolicyURL": "https://www.nvidia.com/en-us/about-nvidia/privacy-policy/", + "retainsPrompts": true, + "termsOfServiceURL": "https://assets.ngc.nvidia.com/products/api-catalog/legal/NVIDIA%20API%20Trial%20Terms%20of%20Service.pdf", + "training": true }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, + "disable_free_endpoint_limits": false, + "reasoning_return_mechanism": "reasoning-content", + "supports_input_audio": false, + "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true + "literal_none": false, + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, - "has_completions": true, - "id": "c611a942-6a7c-43c7-b26e-d91cf0fb7b83", + "has_completions": false, + "id": "94f9fb8b-f775-405d-8c3a-6c918cb12dd8", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-23T23:04:50+00:00", + "author": "nvidia", + "context_length": 256000, + "created_at": "2025-12-14T16:54:35+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.8, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Thinking", + "hf_slug": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Thinking", + "name": "NVIDIA: Nemotron 3 Nano 30B A3B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-thinking", + "permaslug": "nvidia/nemotron-3-nano-30b-a3b", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 235B A22B Thinking", - "slug": "qwen/qwen3-vl-235b-a22b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Nemotron 3 Nano 30B A3B", + "slug": "nvidia/nemotron-3-nano-30b-a3b", + "updated_at": "2025-12-16T17:44:22.146099+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-thinking", - "model_variant_slug": "qwen/qwen3-vl-235b-a22b-thinking", + "model_variant_permaslug": "nvidia/nemotron-3-nano-30b-a3b:free", + "model_variant_slug": "nvidia/nemotron-3-nano-30b-a3b:free", "moderation_required": false, - "name": "Novita | qwen/qwen3-vl-235b-a22b-thinking", + "name": "Nvidia | nvidia/nemotron-3-nano-30b-a3b:free", "pricing": { - "completion": "0.00000395", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000098", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "NVIDIA", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", - "byokEnabled": true, + "adapterName": "NvidiaAdapter", + "baseUrl": "https://integrate.api.nvidia.com/v1", + "byokEnabled": false, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", + "privacyPolicyURL": "https://www.nvidia.com/en-us/about-nvidia/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "termsOfServiceURL": "https://assets.ngc.nvidia.com/products/api-catalog/legal/NVIDIA%20API%20Trial%20Terms%20of%20Service.pdf", "training": false }, - "displayName": "NovitaAI", - "editors": ["{}"], + "displayName": "NVIDIA", + "editors": [], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.nvidia.com/en-us/&size=256" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "name": "Nvidia", + "owners": [], + "slug": "nvidia", + "statusPageUrl": null }, - "provider_model_id": "qwen/qwen3-vl-235b-a22b-thinking", - "provider_name": "Novita", + "provider_model_id": "private/openrouter/nvidia/nemotron-3-nano-30b-a3b", + "provider_name": "Nvidia", "provider_region": null, - "provider_slug": "novita/bf16", + "provider_slug": "nvidia/bf16", "quantization": "bf16", "supported_parameters": [ "reasoning", "include_reasoning", - "max_tokens", "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", + "max_tokens", "seed", - "top_k", - "repetition_penalty" + "top_p", + "tool_choice", + "tools" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Thinking", + "hf_slug": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Thinking", + "name": "NVIDIA: Nemotron 3 Nano 30B A3B (free)", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-thinking", + "permaslug": "nvidia/nemotron-3-nano-30b-a3b", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 235B A22B Thinking", - "slug": "qwen/qwen3-vl-235b-a22b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Nemotron 3 Nano 30B A3B (free)", + "slug": "nvidia/nemotron-3-nano-30b-a3b", + "updated_at": "2025-12-16T17:44:22.146099+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-10-06T23:47:56.430294+00:00", + "author": "nvidia", + "context_length": 128000, + "created_at": "2025-10-28T18:19:25.723503+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME—surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", - "training": false + "privacyPolicyURL": "https://www.nvidia.com/en-us/about-nvidia/privacy-policy/", + "retainsPrompts": true, + "termsOfServiceURL": "https://assets.ngc.nvidia.com/products/api-catalog/legal/NVIDIA%20API%20Trial%20Terms%20of%20Service.pdf", + "training": true }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "disable_free_endpoint_limits": false, + "supports_base64_video_input": true, + "supports_input_audio": false, + "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": true }, "has_chat_completions": true, - "has_completions": true, - "id": "0ae33e05-f2b8-49bd-8a58-f61a835466bd", + "has_completions": false, + "id": "28304d1d-c2b9-4291-ba4d-dc63e798227e", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-06T23:47:56.430294+00:00", + "author": "nvidia", + "context_length": 128000, + "created_at": "2025-10-28T18:19:25.723503+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s memory-efficient sequence modeling for significantly higher throughput and lower latency.\n\nThe model supports inputs of text and multi-image documents, producing natural-language outputs. It is trained on high-quality NVIDIA-curated synthetic datasets optimized for optical-character recognition, chart reasoning, and multimodal comprehension.\n\nNemotron Nano 2 VL achieves leading results on OCRBench v2 and scores ≈ 74 average across MMMU, MathVista, AI2D, OCRBench, OCR-Reasoning, ChartQA, DocVQA, and Video-MME—surpassing prior open VL baselines. With Efficient Video Sampling (EVS), it handles long-form videos while reducing inference cost.\n\nOpen-weights, training data, and fine-tuning recipes are released under a permissive NVIDIA open license, with deployment supported across NeMo, NIM, and major inference runtimes.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Instruct", + "name": "NVIDIA: Nemotron Nano 12B 2 VL", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "permaslug": "nvidia/nemotron-nano-12b-v2-vl", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 30B A3B Instruct", - "slug": "qwen/qwen3-vl-30b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Nemotron Nano 12B 2 VL", + "slug": "nvidia/nemotron-nano-12b-v2-vl", + "updated_at": "2025-11-12T02:19:07.557675+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-instruct", - "model_variant_slug": "qwen/qwen3-vl-30b-a3b-instruct", + "model_variant_permaslug": "nvidia/nemotron-nano-12b-v2-vl:free", + "model_variant_slug": "nvidia/nemotron-nano-12b-v2-vl:free", "moderation_required": false, - "name": "Novita | qwen/qwen3-vl-30b-a3b-instruct", + "name": "Nvidia | nvidia/nemotron-nano-12b-v2-vl:free", "pricing": { - "completion": "0.0000007", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "NVIDIA", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", - "byokEnabled": true, + "adapterName": "OpenAIAdapter", + "baseUrl": "https://1afcd6e8-59bf-4102-95ed-7ec410f6959f.invocation.api.nvcf.nvidia.com/v1", + "byokEnabled": false, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", + "privacyPolicyURL": "https://www.nvidia.com/en-us/about-nvidia/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "termsOfServiceURL": "https://assets.ngc.nvidia.com/products/api-catalog/legal/NVIDIA%20API%20Trial%20Terms%20of%20Service.pdf", "training": false }, - "displayName": "NovitaAI", - "editors": ["{}"], + "displayName": "NVIDIA", + "editors": [], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.nvidia.com/en-us/&size=256" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "name": "Nvidia", + "owners": [], + "slug": "nvidia", + "statusPageUrl": null }, - "provider_model_id": "qwen/qwen3-vl-30b-a3b-instruct", - "provider_name": "Novita", + "provider_model_id": "nvidia/nvidia-nemotron-nano-12b-v2-vl", + "provider_name": "Nvidia", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "nvidia", + "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", - "max_tokens", + "reasoning", + "include_reasoning", "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", + "max_tokens", "seed", - "top_k", - "repetition_penalty", - "tools", - "tool_choice" + "top_p", + "tool_choice", + "tools" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL-BF16", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Instruct", + "name": "NVIDIA: Nemotron Nano 12B 2 VL (free)", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "permaslug": "nvidia/nemotron-nano-12b-v2-vl", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 30B A3B Instruct", - "slug": "qwen/qwen3-vl-30b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Nemotron Nano 12B 2 VL (free)", + "slug": "nvidia/nemotron-nano-12b-v2-vl", + "updated_at": "2025-11-12T02:19:07.557675+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-10-06T23:47:59.575824+00:00", - "default_parameters": { - "temperature": 0.8, - "top_p": 0.95 - }, + "author": "nvidia", + "context_length": 128000, + "created_at": "2025-09-05T21:13:27.486887+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", + "privacyPolicyURL": "https://www.nvidia.com/en-us/about-nvidia/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "termsOfServiceURL": "https://assets.ngc.nvidia.com/products/api-catalog/legal/NVIDIA%20API%20Trial%20Terms%20of%20Service.pdf", "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true }, + "supports_input_audio": false, + "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -113117,30 +112241,27 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "38ee90ad-31ec-4a90-925c-a95d9829636a", + "has_completions": false, + "id": "71549a70-5ef5-406b-ae54-fab8adfb6dae", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 50, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 0, - "created_at": "2025-10-06T23:47:59.575824+00:00", - "default_parameters": { - "temperature": 0.8, - "top_p": 0.95 - }, + "author": "nvidia", + "context_length": 32000, + "created_at": "2025-09-05T21:13:27.486887+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", "features": { "reasoning_config": { "end_token": "", @@ -113148,109 +112269,80 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Thinking", + "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Thinking", + "name": "NVIDIA: Nemotron Nano 9B V2", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-thinking", + "permaslug": "nvidia/nemotron-nano-9b-v2", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 30B A3B Thinking", - "slug": "qwen/qwen3-vl-30b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Nemotron Nano 9B V2", + "slug": "nvidia/nemotron-nano-9b-v2", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-thinking", - "model_variant_slug": "qwen/qwen3-vl-30b-a3b-thinking", + "model_variant_permaslug": "nvidia/nemotron-nano-9b-v2:free", + "model_variant_slug": "nvidia/nemotron-nano-9b-v2:free", "moderation_required": false, - "name": "Novita | qwen/qwen3-vl-30b-a3b-thinking", + "name": "Nvidia | nvidia/nemotron-nano-9b-v2:free", "pricing": { - "completion": "0.000001", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "NVIDIA", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", - "byokEnabled": true, + "adapterName": "OpenAIAdapter", + "baseUrl": "https://74819e7c-e3e6-4497-8fdb-5f5fdc17dc85.invocation.api.nvcf.nvidia.com/v1", + "byokEnabled": false, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", + "privacyPolicyURL": "https://www.nvidia.com/en-us/about-nvidia/privacy-policy/", "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "termsOfServiceURL": "https://assets.ngc.nvidia.com/products/api-catalog/legal/NVIDIA%20API%20Trial%20Terms%20of%20Service.pdf", "training": false }, - "displayName": "NovitaAI", - "editors": ["{}"], + "displayName": "NVIDIA", + "editors": [], "hasChatCompletions": true, - "hasCompletions": true, + "hasCompletions": false, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.nvidia.com/en-us/&size=256" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "name": "Nvidia", + "owners": [], + "slug": "nvidia", + "statusPageUrl": null }, - "provider_model_id": "qwen/qwen3-vl-30b-a3b-thinking", - "provider_name": "Novita", + "provider_model_id": "nvidia/nvidia-nemotron-nano-9b-v2", + "provider_name": "Nvidia", "provider_region": null, - "provider_slug": "novita/fp16", - "quantization": "fp16", + "provider_slug": "nvidia/bf16", + "quantization": "bf16", "supported_parameters": [ "reasoning", "include_reasoning", "structured_outputs", "response_format", - "max_tokens", "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", + "max_tokens", "seed", - "top_k", - "repetition_penalty", + "top_p", "tools", "tool_choice" ], @@ -113258,7 +112350,7 @@ "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { "reasoning_config": { @@ -113267,56 +112359,66 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Thinking", + "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Thinking", + "name": "NVIDIA: Nemotron Nano 9B V2 (free)", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-thinking", + "permaslug": "nvidia/nemotron-nano-9b-v2", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 30B A3B Thinking", - "slug": "qwen/qwen3-vl-30b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Nemotron Nano 9B V2 (free)", + "slug": "nvidia/nemotron-nano-9b-v2", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - }, + } + ], + "name": "Nvidia", + "slug": "nvidia" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "displayName": "OpenAI", + "headquarters": "US", + "icon": { + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" + }, + "models": [ { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-10-14T17:35:08.402158+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, + "author": "openai", + "context_length": 128000, + "created_at": "2024-08-14T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", + "description": "OpenAI ChatGPT 4o is continually updated by OpenAI to point to the current version of GPT-4o used by ChatGPT. It therefore differs slightly from the API version of [GPT-4o](/models/openai/gpt-4o) in that it has additional RLHF. It is intended for research and evaluation.\n\nOpenAI notes that this model is not suited for production use-cases as it may be removed or redirected to another model in the future.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -113326,7 +112428,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "6bfd6b5b-ff8c-436f-ae3f-d28c02bcfce3", + "id": "aff4b825-af10-4633-9ab2-9ac68c547988", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -113335,188 +112437,145 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 256000, - "created_at": "2025-10-14T17:35:08.402158+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, + "author": "openai", + "context_length": 128000, + "created_at": "2024-08-14T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "OpenAI ChatGPT 4o is continually updated by OpenAI to point to the current version of GPT-4o used by ChatGPT. It therefore differs slightly from the API version of [GPT-4o](/models/openai/gpt-4o) in that it has additional RLHF. It is intended for research and evaluation.\n\nOpenAI notes that this model is not suited for production use-cases as it may be removed or redirected to another model in the future.", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 8B Instruct", + "name": "OpenAI: ChatGPT-4o", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-8b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "openai/chatgpt-4o-latest", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 VL 8B Instruct", - "slug": "qwen/qwen3-vl-8b-instruct", + "short_name": "ChatGPT-4o", + "slug": "openai/chatgpt-4o-latest", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-8b-instruct", - "model_variant_slug": "qwen/qwen3-vl-8b-instruct", - "moderation_required": false, - "name": "Novita | qwen/qwen3-vl-8b-instruct", + "model_variant_permaslug": "openai/chatgpt-4o-latest", + "model_variant_slug": "openai/chatgpt-4o-latest", + "moderation_required": true, + "name": "OpenAI | openai/chatgpt-4o-latest", "pricing": { - "completion": "0.0000005", + "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "prompt": "0.000005" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "qwen/qwen3-vl-8b-instruct", - "provider_name": "Novita", + "provider_model_id": "chatgpt-4o-latest", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty", - "tools", - "tool_choice" + "logit_bias", + "logprobs", + "top_logprobs" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 8B Instruct", + "name": "OpenAI: ChatGPT-4o", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-8b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "openai/chatgpt-4o-latest", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 VL 8B Instruct", - "slug": "qwen/qwen3-vl-8b-instruct", + "short_name": "ChatGPT-4o", + "slug": "openai/chatgpt-4o-latest", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 32000, - "created_at": "2024-09-19T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "openai", + "context_length": 128000, + "created_at": "2026-01-19T22:42:49.327397+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "The gpt-audio model is OpenAI's first generally available audio model. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Audio is priced at $32 per million input tokens and $64 per million output tokens.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 32000, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -113526,7 +112585,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "a2a8a9fd-2784-448f-99b8-076fb6e1d8e5", + "id": "df1ccb5f-8dce-4d7e-8a34-5a6613b65995", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -113535,166 +112594,155 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2024-09-19T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "openai", + "context_length": 128000, + "created_at": "2026-01-19T22:42:49.327397+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", - "features": {}, - "group": "Qwen", + "description": "The gpt-audio model is OpenAI's first generally available audio model. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Audio is priced at $32 per million input tokens and $64 per million output tokens.", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-72B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", + "input_modalities": ["text", "audio"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen2.5 72B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-72b-instruct", - "reasoning_config": null, + "name": "OpenAI: GPT Audio", + "output_modalities": ["text", "audio"], + "permaslug": "openai/gpt-audio", + "reasoning_config": {}, "router": null, - "short_name": "Qwen2.5 72B Instruct", - "slug": "qwen/qwen-2.5-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT Audio", + "slug": "openai/gpt-audio", + "updated_at": "2026-01-19T22:46:20.999934+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen-2.5-72b-instruct", - "model_variant_slug": "qwen/qwen-2.5-72b-instruct", - "moderation_required": false, - "name": "Novita | qwen/qwen-2.5-72b-instruct", + "model_variant_permaslug": "openai/gpt-audio", + "model_variant_slug": "openai/gpt-audio", + "moderation_required": true, + "name": "OpenAI | openai/gpt-audio", "pricing": { - "completion": "0.0000004", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000038", - "request": "0", - "web_search": "0" + "prompt": "0.0000025" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "qwen/qwen-2.5-72b-instruct", - "provider_name": "Novita", + "provider_model_id": "gpt-audio-2025-08-28", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty", - "response_format", - "structured_outputs", - "tools", - "tool_choice" + "logit_bias", + "logprobs", + "top_logprobs" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Qwen", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-72B-Instruct", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", + "input_modalities": ["text", "audio"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen2.5 72B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-72b-instruct", - "reasoning_config": null, + "name": "OpenAI: GPT Audio", + "output_modalities": ["text", "audio"], + "permaslug": "openai/gpt-audio", + "reasoning_config": {}, "router": null, - "short_name": "Qwen2.5 72B Instruct", - "slug": "qwen/qwen-2.5-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT Audio", + "slug": "openai/gpt-audio", + "updated_at": "2026-01-19T22:46:20.999934+00:00", "warning_message": null }, { - "author": "sao10k", - "context_length": 8192, - "created_at": "2024-08-13T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "openai", + "context_length": 128000, + "created_at": "2026-01-19T21:50:19.564599+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", + "description": "A cost-efficient version of GPT Audio. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Input is priced at $0.60 per million tokens and output is priced at $2.40 per million tokens.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -113704,7 +112752,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "d5d16ef3-adbc-4e45-8421-bc7d343e902f", + "id": "53417803-f19f-4905-af00-f7e023d09858", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -113713,114 +112761,101 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sao10k", - "context_length": 8192, - "created_at": "2024-08-13T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "openai", + "context_length": 128000, + "created_at": "2026-01-19T21:50:19.564599+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge.\n\nCreated by [Sao10k](https://huggingface.co/Sao10k), this model aims to offer an improved experience over Stheno v3.2, with enhanced creativity and logical reasoning.\n\nFor best results, use with Llama 3 Instruct context template, temperature 1.4, and min_p 0.1.", - "features": {}, - "group": "Llama3", + "description": "A cost-efficient version of GPT Audio. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Input is priced at $0.60 per million tokens and output is priced at $2.40 per million tokens.", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "Sao10K/L3-8B-Lunaris-v1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", + "input_modalities": ["text", "audio"], + "instruct_type": null, "model_version_group_id": null, - "name": "Sao10K: Llama 3 8B Lunaris", - "output_modalities": ["text"], - "permaslug": "sao10k/l3-lunaris-8b", - "reasoning_config": null, + "name": "OpenAI: GPT Audio Mini", + "output_modalities": ["text", "audio"], + "permaslug": "openai/gpt-audio-mini", + "reasoning_config": {}, "router": null, - "short_name": "Llama 3 8B Lunaris", - "slug": "sao10k/l3-lunaris-8b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT Audio Mini", + "slug": "openai/gpt-audio-mini", + "updated_at": "2026-01-19T22:39:40.831478+00:00", "warning_message": null }, - "model_variant_permaslug": "sao10k/l3-lunaris-8b", - "model_variant_slug": "sao10k/l3-lunaris-8b", - "moderation_required": false, - "name": "Novita | sao10k/l3-lunaris-8b", + "model_variant_permaslug": "openai/gpt-audio-mini", + "model_variant_slug": "openai/gpt-audio-mini", + "moderation_required": true, + "name": "OpenAI | openai/gpt-audio-mini", "pricing": { - "completion": "0.00000005", + "completion": "0.0000024", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "sao10k/l3-8b-lunaris", - "provider_name": "Novita", + "provider_model_id": "gpt-audio-mini-2025-12-15", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty" + "logit_bias", + "logprobs", + "top_logprobs" ], "supports_multipart": true, "supports_reasoning": false, @@ -113828,42 +112863,46 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "Sao10K/L3-8B-Lunaris-v1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", + "input_modalities": ["text", "audio"], + "instruct_type": null, "model_version_group_id": null, - "name": "Sao10K: Llama 3 8B Lunaris", - "output_modalities": ["text"], - "permaslug": "sao10k/l3-lunaris-8b", - "reasoning_config": null, + "name": "OpenAI: GPT Audio Mini", + "output_modalities": ["text", "audio"], + "permaslug": "openai/gpt-audio-mini", + "reasoning_config": {}, "router": null, - "short_name": "Llama 3 8B Lunaris", - "slug": "sao10k/l3-lunaris-8b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT Audio Mini", + "slug": "openai/gpt-audio-mini", + "updated_at": "2026-01-19T22:39:40.831478+00:00", "warning_message": null }, { - "author": "sao10k", - "context_length": 8192, - "created_at": "2024-06-18T00:00:00+00:00", + "author": "openai", + "context_length": 16385, + "created_at": "2023-05-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).\n\n- Better prompt adherence.\n- Better anatomy / spatial awareness.\n- Adapts much better to unique and custom formatting / reply formats.\n- Very creative, lots of unique swipes.\n- Is not restrictive during roleplays.", + "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 16385, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { @@ -113876,7 +112915,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "2ab67dc9-421b-408e-b61a-b4b86ea1df70", + "id": "3a632f37-731d-4200-9e38-413a5f5dd39d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -113885,112 +112924,94 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sao10k", - "context_length": 8192, - "created_at": "2024-06-18T00:00:00+00:00", + "author": "openai", + "context_length": 16385, + "created_at": "2023-05-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k).\n\n- Better prompt adherence.\n- Better anatomy / spatial awareness.\n- Adapts much better to unique and custom formatting / reply formats.\n- Very creative, lots of unique swipes.\n- Is not restrictive during roleplays.", + "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", "features": {}, - "group": "Llama3", + "group": "GPT", "has_text_output": true, - "hf_slug": "Sao10K/L3-70B-Euryale-v2.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "Sao10k: Llama 3 Euryale 70B v2.1", + "name": "OpenAI: GPT-3.5 Turbo", "output_modalities": ["text"], - "permaslug": "sao10k/l3-euryale-70b", + "permaslug": "openai/gpt-3.5-turbo", "reasoning_config": null, "router": null, - "short_name": "Llama 3 Euryale 70B v2.1", - "slug": "sao10k/l3-euryale-70b", + "short_name": "GPT-3.5 Turbo", + "slug": "openai/gpt-3.5-turbo", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "sao10k/l3-euryale-70b", - "model_variant_slug": "sao10k/l3-euryale-70b", - "moderation_required": false, - "name": "Novita | sao10k/l3-euryale-70b", + "model_variant_permaslug": "openai/gpt-3.5-turbo", + "model_variant_slug": "openai/gpt-3.5-turbo", + "moderation_required": true, + "name": "OpenAI | openai/gpt-3.5-turbo", "pricing": { - "completion": "0.00000148", + "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000148", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "sao10k/l3-70b-euryale-v2.1", - "provider_name": "Novita", + "provider_model_id": "gpt-3.5-turbo", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", "tools", "tool_choice" ], @@ -114001,41 +113022,42 @@ "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "GPT", "has_text_output": true, - "hf_slug": "Sao10K/L3-70B-Euryale-v2.1", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "Sao10k: Llama 3 Euryale 70B v2.1", + "name": "OpenAI: GPT-3.5 Turbo", "output_modalities": ["text"], - "permaslug": "sao10k/l3-euryale-70b", + "permaslug": "openai/gpt-3.5-turbo", "reasoning_config": null, "router": null, - "short_name": "Llama 3 Euryale 70B v2.1", - "slug": "sao10k/l3-euryale-70b", + "short_name": "GPT-3.5 Turbo", + "slug": "openai/gpt-3.5-turbo", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "sao10k", - "context_length": 8192, - "created_at": "2024-08-28T00:00:00+00:00", + "author": "openai", + "context_length": 16385, + "created_at": "2023-08-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", + "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 16385, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { @@ -114048,7 +113070,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9170f475-2eac-4b4d-950d-22be425df236", + "id": "eb1a93d0-a295-4afb-86d3-e2d10538c12d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -114057,112 +113079,94 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "sao10k", - "context_length": 131072, - "created_at": "2024-08-28T00:00:00+00:00", + "author": "openai", + "context_length": 16385, + "created_at": "2023-08-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).", + "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.", "features": {}, - "group": "Llama3", + "group": "GPT", "has_text_output": true, - "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", + "name": "OpenAI: GPT-3.5 Turbo 16k", "output_modalities": ["text"], - "permaslug": "sao10k/l3.1-euryale-70b", + "permaslug": "openai/gpt-3.5-turbo-16k", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 Euryale 70B v2.2", - "slug": "sao10k/l3.1-euryale-70b", + "short_name": "GPT-3.5 Turbo 16k", + "slug": "openai/gpt-3.5-turbo-16k", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "sao10k/l3.1-euryale-70b", - "model_variant_slug": "sao10k/l3.1-euryale-70b", - "moderation_required": false, - "name": "Novita | sao10k/l3.1-euryale-70b", + "model_variant_permaslug": "openai/gpt-3.5-turbo-16k", + "model_variant_slug": "openai/gpt-3.5-turbo-16k", + "moderation_required": true, + "name": "OpenAI | openai/gpt-3.5-turbo-16k", "pricing": { - "completion": "0.00000148", + "completion": "0.000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000148", - "request": "0", - "web_search": "0" + "prompt": "0.000003" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "sao10k/l31-70b-euryale-v2.2", - "provider_name": "Novita", + "provider_model_id": "gpt-3.5-turbo-16k", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", "tools", "tool_choice" ], @@ -114173,45 +113177,45 @@ "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "GPT", "has_text_output": true, - "hf_slug": "Sao10K/L3.1-70B-Euryale-v2.2", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", + "instruct_type": null, "model_version_group_id": null, - "name": "Sao10K: Llama 3.1 Euryale 70B v2.2", + "name": "OpenAI: GPT-3.5 Turbo 16k", "output_modalities": ["text"], - "permaslug": "sao10k/l3.1-euryale-70b", + "permaslug": "openai/gpt-3.5-turbo-16k", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 Euryale 70B v2.2", - "slug": "sao10k/l3.1-euryale-70b", + "short_name": "GPT-3.5 Turbo 16k", + "slug": "openai/gpt-3.5-turbo-16k", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "thudm", - "context_length": 65536, - "created_at": "2025-07-11T14:33:05.378045+00:00", + "author": "openai", + "context_length": 4095, + "created_at": "2023-09-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "GLM-4.1V-9B-Thinking is a 9B parameter vision-language model developed by THUDM, based on the GLM-4-9B foundation. It introduces a reasoning-centric \"thinking paradigm\" enhanced with reinforcement learning to improve multimodal reasoning, long-context understanding (up to 64K tokens), and complex problem solving. It achieves state-of-the-art performance among models in its class, outperforming even larger models like Qwen-2.5-VL-72B on a majority of benchmark tasks. ", + "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 65536, + "context_length": 4095, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -114221,7 +113225,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "b5eeb1ab-c02f-4805-9706-30feb5bfd171", + "id": "39594b4d-6c7e-4ccd-aeed-79f9b3ca5819", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -114230,177 +113234,138 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8000, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "thudm", - "context_length": 65536, - "created_at": "2025-07-11T14:33:05.378045+00:00", + "author": "openai", + "context_length": 4095, + "created_at": "2023-09-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "GLM-4.1V-9B-Thinking is a 9B parameter vision-language model developed by THUDM, based on the GLM-4-9B foundation. It introduces a reasoning-centric \"thinking paradigm\" enhanced with reinforcement learning to improve multimodal reasoning, long-context understanding (up to 64K tokens), and complex problem solving. It achieves state-of-the-art performance among models in its class, outperforming even larger models like Qwen-2.5-VL-72B on a majority of benchmark tasks. ", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "THUDM/GLM-4.1V-9B-Thinking", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "chatml", "model_version_group_id": null, - "name": "THUDM: GLM 4.1V 9B Thinking", + "name": "OpenAI: GPT-3.5 Turbo Instruct", "output_modalities": ["text"], - "permaslug": "thudm/glm-4.1v-9b-thinking", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-3.5-turbo-instruct", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.1V 9B Thinking", - "slug": "thudm/glm-4.1v-9b-thinking", + "short_name": "GPT-3.5 Turbo Instruct", + "slug": "openai/gpt-3.5-turbo-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "thudm/glm-4.1v-9b-thinking", - "model_variant_slug": "thudm/glm-4.1v-9b-thinking", - "moderation_required": false, - "name": "Novita | thudm/glm-4.1v-9b-thinking", + "model_variant_permaslug": "openai/gpt-3.5-turbo-instruct", + "model_variant_slug": "openai/gpt-3.5-turbo-instruct", + "moderation_required": true, + "name": "OpenAI | openai/gpt-3.5-turbo-instruct", "pricing": { - "completion": "0.000000138", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000035", - "request": "0", - "web_search": "0" + "prompt": "0.0000015" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "thudm/glm-4.1v-9b-thinking", - "provider_name": "Novita", + "provider_model_id": "gpt-3.5-turbo-instruct", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty" + "logit_bias", + "logprobs", + "top_logprobs" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "THUDM/GLM-4.1V-9B-Thinking", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "chatml", "model_version_group_id": null, - "name": "THUDM: GLM 4.1V 9B Thinking", + "name": "OpenAI: GPT-3.5 Turbo Instruct", "output_modalities": ["text"], - "permaslug": "thudm/glm-4.1v-9b-thinking", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-3.5-turbo-instruct", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.1V 9B Thinking", - "slug": "thudm/glm-4.1v-9b-thinking", + "short_name": "GPT-3.5 Turbo Instruct", + "slug": "openai/gpt-3.5-turbo-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "microsoft", - "context_length": 65535, - "created_at": "2024-04-16T00:00:00+00:00", + "author": "openai", + "context_length": 8191, + "created_at": "2023-05-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["USER:", ""], + "default_stops": [], "default_system": null, - "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", + "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 65535, + "context_length": 8191, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { @@ -114413,7 +113378,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "92d0658f-b48a-4746-96e7-75ea71d5e060", + "id": "355b1df4-06c8-4c36-a091-3d50477095fb", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -114422,159 +113387,140 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8000, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "microsoft", - "context_length": 65536, - "created_at": "2024-04-16T00:00:00+00:00", + "author": "openai", + "context_length": 8191, + "created_at": "2023-05-28T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["USER:", ""], + "default_stops": [], "default_system": null, - "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", + "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.", "features": {}, - "group": "Mistral", + "group": "GPT", "has_text_output": true, - "hf_slug": "microsoft/WizardLM-2-8x22B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "vicuna", + "instruct_type": null, "model_version_group_id": null, - "name": "WizardLM-2 8x22B", + "name": "OpenAI: GPT-4", "output_modalities": ["text"], - "permaslug": "microsoft/wizardlm-2-8x22b", + "permaslug": "openai/gpt-4", "reasoning_config": null, "router": null, - "short_name": "WizardLM-2 8x22B", - "slug": "microsoft/wizardlm-2-8x22b", + "short_name": "GPT-4", + "slug": "openai/gpt-4", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "microsoft/wizardlm-2-8x22b", - "model_variant_slug": "microsoft/wizardlm-2-8x22b", - "moderation_required": false, - "name": "Novita | microsoft/wizardlm-2-8x22b", + "model_variant_permaslug": "openai/gpt-4", + "model_variant_slug": "openai/gpt-4", + "moderation_required": true, + "name": "OpenAI | openai/gpt-4", "pricing": { - "completion": "0.00000062", + "completion": "0.00006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000062", - "request": "0", - "web_search": "0" + "prompt": "0.00003" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "microsoft/wizardlm-2-8x22b", - "provider_name": "Novita", + "provider_model_id": "gpt-4", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty" + "logit_bias", + "logprobs", + "top_logprobs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Mistral", + "group": "GPT", "has_text_output": true, - "hf_slug": "microsoft/WizardLM-2-8x22B", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "vicuna", + "instruct_type": null, "model_version_group_id": null, - "name": "WizardLM-2 8x22B", + "name": "OpenAI: GPT-4", "output_modalities": ["text"], - "permaslug": "microsoft/wizardlm-2-8x22b", + "permaslug": "openai/gpt-4", "reasoning_config": null, "router": null, - "short_name": "WizardLM-2 8x22B", - "slug": "microsoft/wizardlm-2-8x22b", + "short_name": "GPT-4", + "slug": "openai/gpt-4", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-07-25T19:22:27.278283+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, + "author": "openai", + "context_length": 8191, + "created_at": "2023-05-28T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 8191, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { @@ -114587,7 +113533,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "e4ada6ec-12e7-49c9-837c-398dee612403", + "id": "69206fec-9f6f-4338-9919-5ed90134c376", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -114596,198 +113542,143 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 98304, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-07-25T19:22:27.278283+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, + "author": "openai", + "context_length": 8191, + "created_at": "2023-05-28T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5", + "name": "OpenAI: GPT-4 (older v0314)", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4-0314", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5", - "slug": "z-ai/glm-4.5", - "updated_at": "2026-01-05T22:04:10.598351+00:00", + "short_name": "GPT-4 (older v0314)", + "slug": "openai/gpt-4-0314", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.5", - "model_variant_slug": "z-ai/glm-4.5", - "moderation_required": false, - "name": "Novita | z-ai/glm-4.5", + "model_variant_permaslug": "openai/gpt-4-0314", + "model_variant_slug": "openai/gpt-4-0314", + "moderation_required": true, + "name": "OpenAI | openai/gpt-4-0314", "pricing": { - "completion": "0.0000022", + "completion": "0.00006", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000011", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.00003" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "zai-org/glm-4.5", - "provider_name": "Novita", + "provider_model_id": "gpt-4-0314", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5", + "name": "OpenAI: GPT-4 (older v0314)", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4-0314", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5", - "slug": "z-ai/glm-4.5", - "updated_at": "2026-01-05T22:04:10.598351+00:00", + "short_name": "GPT-4 (older v0314)", + "slug": "openai/gpt-4-0314", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-07-25T19:20:58.066206+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, + "author": "openai", + "context_length": 128000, + "created_at": "2024-04-09T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { - "is_mandatory_reasoning": false, - "supported_parameters": { - "response_format": true, - "structured_outputs": false - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -114797,7 +113688,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "90fc9bb3-01bb-419f-bb58-dea60a80d201", + "id": "da16824f-3ba0-43a1-86f8-a6131837f457", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -114806,194 +113697,144 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 98304, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-07-25T19:20:58.066206+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, + "author": "openai", + "context_length": 128000, + "created_at": "2024-04-09T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5-Air", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5 Air", + "name": "OpenAI: GPT-4 Turbo", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5-air", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4-turbo", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5 Air", - "slug": "z-ai/glm-4.5-air", + "short_name": "GPT-4 Turbo", + "slug": "openai/gpt-4-turbo", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.5-air", - "model_variant_slug": "z-ai/glm-4.5-air", - "moderation_required": false, - "name": "Novita | z-ai/glm-4.5-air", + "model_variant_permaslug": "openai/gpt-4-turbo", + "model_variant_slug": "openai/gpt-4-turbo", + "moderation_required": true, + "name": "OpenAI | openai/gpt-4-turbo", "pricing": { - "completion": "0.00000085", + "completion": "0.00003", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000013", - "request": "0", - "web_search": "0" + "prompt": "0.00001" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "zai-org/glm-4.5-air", - "provider_name": "Novita", + "provider_model_id": "gpt-4-turbo", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty", + "logit_bias", + "logprobs", + "top_logprobs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5-Air", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5 Air", + "name": "OpenAI: GPT-4 Turbo", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5-air", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4-turbo", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5 Air", - "slug": "z-ai/glm-4.5-air", + "short_name": "GPT-4 Turbo", + "slug": "openai/gpt-4-turbo", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 65536, - "created_at": "2025-08-11T14:24:48.340676+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, + "author": "openai", + "context_length": 128000, + "created_at": "2023-11-06T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 65536, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { - "supported_parameters": {}, - "supports_input_audio": false, - "supports_multipart": true, + "supports_file_urls": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -115003,7 +113844,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "30f10717-f218-430f-a3d7-858bf5fdf303", + "id": "8744de26-f64f-41cf-bd0e-950a83d1a923", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -115012,190 +113853,140 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 65536, - "created_at": "2025-08-11T14:24:48.340676+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.75, - "top_p": null - }, + "author": "openai", + "context_length": 128000, + "created_at": "2023-11-06T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5V", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5V", + "name": "OpenAI: GPT-4 Turbo (older v1106)", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5v", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4-1106-preview", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5V", - "slug": "z-ai/glm-4.5v", + "short_name": "GPT-4 Turbo (older v1106)", + "slug": "openai/gpt-4-1106-preview", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.5v", - "model_variant_slug": "z-ai/glm-4.5v", - "moderation_required": false, - "name": "Novita | z-ai/glm-4.5v", + "model_variant_permaslug": "openai/gpt-4-1106-preview", + "model_variant_slug": "openai/gpt-4-1106-preview", + "moderation_required": true, + "name": "OpenAI | openai/gpt-4-1106-preview", "pricing": { - "completion": "0.0000018", + "completion": "0.00003", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000011", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.00001" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIResponsesAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "zai-org/glm-4.5v", - "provider_name": "Novita", + "provider_model_id": "gpt-4-1106-preview", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty", - "response_format", - "structured_outputs", + "logit_bias", + "logprobs", + "top_logprobs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5V", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5V", + "name": "OpenAI: GPT-4 Turbo (older v1106)", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5v", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4-1106-preview", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.5V", - "slug": "z-ai/glm-4.5v", + "short_name": "GPT-4 Turbo (older v1106)", + "slug": "openai/gpt-4-1106-preview", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 204800, - "created_at": "2025-09-30T12:32:56.306946+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": null - }, + "author": "openai", + "context_length": 128000, + "created_at": "2024-01-25T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 204800, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { @@ -115208,7 +113999,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "562fe77f-c92f-4556-b43b-ea7da6891b6a", + "id": "003933da-395a-48eb-86a3-0c4ec486d67f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -115217,30 +114008,19 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 200000, - "created_at": "2025-09-30T12:32:56.306946+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": null - }, + "author": "openai", + "context_length": 128000, + "created_at": "2024-01-25T00:00:00+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.", + "features": {}, + "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, @@ -115248,119 +114028,85 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "OpenAI: GPT-4 Turbo Preview", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4-turbo-preview", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "GPT-4 Turbo Preview", + "slug": "openai/gpt-4-turbo-preview", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6", - "model_variant_slug": "z-ai/glm-4.6", - "moderation_required": false, - "name": "Novita | z-ai/glm-4.6", + "model_variant_permaslug": "openai/gpt-4-turbo-preview", + "model_variant_slug": "openai/gpt-4-turbo-preview", + "moderation_required": true, + "name": "OpenAI | openai/gpt-4-turbo-preview", "pricing": { - "completion": "0.0000022", + "completion": "0.00003", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000011", - "internal_reasoning": "0", - "prompt": "0.00000055", - "request": "0", - "web_search": "0" + "prompt": "0.00001" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "zai-org/glm-4.6", - "provider_name": "Novita", + "provider_model_id": "gpt-4-turbo-preview", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", + "seed", "max_tokens", + "response_format", + "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "seed", - "top_k", - "repetition_penalty", - "response_format", - "structured_outputs", + "logit_bias", + "logprobs", + "top_logprobs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, @@ -115368,55 +114114,49 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "OpenAI: GPT-4 Turbo Preview", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4-turbo-preview", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "GPT-4 Turbo Preview", + "slug": "openai/gpt-4-turbo-preview", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-12-08T15:24:22.464154+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.8, - "top_p": 0.6 - }, + "author": "openai", + "context_length": 1047576, + "created_at": "2025-04-14T17:23:05+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", + "description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 1047576, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { + "supported_parameters": {}, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, - "literal_none": false, + "literal_none": true, "literal_required": true, "type_function": true - }, - "supports_video_urls": false + } }, "has_chat_completions": true, "has_completions": true, - "id": "a44ea9fe-fc2b-4765-804b-2e31215b5ca5", + "id": "c235abe8-11cc-42d3-95ad-72f4d198287a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -115429,202 +114169,163 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 131072, - "created_at": "2025-12-08T15:24:22.464154+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.8, - "top_p": 0.6 - }, + "author": "openai", + "context_length": 1047576, + "created_at": "2025-04-14T17:23:05+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.6V", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "video"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6V", + "name": "OpenAI: GPT-4.1", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6-20251208", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4.1-2025-04-14", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.6V", - "slug": "z-ai/glm-4.6v", - "updated_at": "2025-12-08T15:45:24.970322+00:00", + "short_name": "GPT-4.1", + "slug": "openai/gpt-4.1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6-20251208", - "model_variant_slug": "z-ai/glm-4.6v", - "moderation_required": false, - "name": "Novita | z-ai/glm-4.6-20251208", + "model_variant_permaslug": "openai/gpt-4.1-2025-04-14", + "model_variant_slug": "openai/gpt-4.1", + "moderation_required": true, + "name": "OpenAI | openai/gpt-4.1-2025-04-14", "pricing": { - "completion": "0.0000009", + "completion": "0.000008", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000055", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "NovitaAI", + "input_cache_read": "0.0000005", + "prompt": "0.000002", + "web_search": "0.01" + }, + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIResponsesAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "zai-org/glm-4.6v", - "provider_name": "Novita", + "provider_model_id": "gpt-4.1-2025-04-14", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/bf16", - "quantization": "bf16", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", "seed", - "top_k", - "repetition_penalty", + "max_tokens", + "response_format", + "structured_outputs", "tools", "tool_choice", - "response_format", - "structured_outputs" + "temperature", + "top_p" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "request": "0.05", + "threshold": "high", + "type": "search-threshold" + }, + { + "request": "0.035", + "threshold": "medium", + "type": "search-threshold" + }, + { + "request": "0.03", + "threshold": "low", + "type": "search-threshold" + } + ], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.6V", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "video"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6V", + "name": "OpenAI: GPT-4.1", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6-20251208", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4.1-2025-04-14", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.6V", - "slug": "z-ai/glm-4.6v", - "updated_at": "2025-12-08T15:45:24.970322+00:00", + "short_name": "GPT-4.1", + "slug": "openai/gpt-4.1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 204800, - "created_at": "2025-12-22T04:33:34.884504+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, + "author": "openai", + "context_length": 1047576, + "created_at": "2025-04-14T17:23:01+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.", "endpoint": { - "adapter_name": "NovitaAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 204800, + "context_length": 1047576, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, "features": { - "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": {}, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, - "literal_none": false, - "literal_required": false, - "type_function": false + "literal_none": true, + "literal_required": true, + "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "53d155e7-021c-4268-bde4-991847ef8389", + "id": "872eccb7-9c85-45fc-974a-ff7c8e2407e6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -115633,216 +114334,146 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 200000, - "created_at": "2025-12-22T04:33:34.884504+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, + "author": "openai", + "context_length": 1047576, + "created_at": "2025-04-14T17:23:01+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.7", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.7", + "name": "OpenAI: GPT-4.1 Mini", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.7-20251222", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4.1-mini-2025-04-14", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.7", - "slug": "z-ai/glm-4.7", - "updated_at": "2026-01-07T19:34:06.523149+00:00", + "short_name": "GPT-4.1 Mini", + "slug": "openai/gpt-4.1-mini", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.7-20251222", - "model_variant_slug": "z-ai/glm-4.7", - "moderation_required": false, - "name": "Novita | z-ai/glm-4.7-20251222", + "model_variant_permaslug": "openai/gpt-4.1-mini-2025-04-14", + "model_variant_slug": "openai/gpt-4.1-mini", + "moderation_required": true, + "name": "OpenAI | openai/gpt-4.1-mini-2025-04-14", "pricing": { - "completion": "0.00000198", - "discount": 0.1, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000099", - "internal_reasoning": "0", - "prompt": "0.00000054", - "request": "0", - "web_search": "0" + "completion": "0.0000016", + "discount": 0, + "input_cache_read": "0.0000001", + "prompt": "0.0000004", + "web_search": "0.01" }, - "provider_display_name": "NovitaAI", + "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "NovitaAdapter", - "baseUrl": "https://api.novita.ai/v3/openai", + "adapterName": "OpenAIResponsesAdapter", + "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://novita.ai/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://novita.ai/legal/terms-of-service", + "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "NovitaAI", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://novita.ai/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/OpenAI.svg" }, - "ignoredProviderModels": [ - "google/gemma-3-1b-it", - "baichuan/baichuan-m2-32b", - "baidu/ernie-4.5-0.3b", - "qwen/qwen-mt-plus", - "qwen/qwen3-4b-fp8", - "meta-llama/llama-3.2-1b-instruct", - "sophosympatheia/midnight-rose-70b", - "deepseek/deepseek-prover-v2-671b", - "Sao10K/L3-8B-Stheno-v3.2", - "thudm/glm-4-32b-0414", - "qwen/qwen3-omni-30b-a3b-thinking", - "qwen/qwen3-omni-30b-a3b-instruct", - "paddlepaddle/paddleocr-vl", - "deepseek/deepseek-ocr", - "skywork/r1v4-lite", - "baidu/ernie-4.5-vl-28b-a3b-thinking", - "zai-org/autoglm-phone-9b-multilingual" - ], + "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": false, - "name": "Novita", - "owners": ["org_34P2zP0TCZwbzCC8QkH8m8o1i8M"], - "slug": "novita", - "statusPageUrl": "https://status.novita.ai/" + "moderationRequired": true, + "name": "OpenAI", + "owners": ["{}"], + "slug": "openai", + "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "zai-org/glm-4.7", - "provider_name": "Novita", + "provider_model_id": "gpt-4.1-mini-2025-04-14", + "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "novita/fp8", - "quantization": "fp8", + "provider_slug": "openai", + "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", "seed", - "top_k", - "repetition_penalty", + "max_tokens", + "response_format", + "structured_outputs", "tools", "tool_choice", - "response_format", - "structured_outputs" + "temperature", + "top_p" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "request": "0.03", + "threshold": "high", + "type": "search-threshold" + }, + { + "request": "0.0275", + "threshold": "medium", + "type": "search-threshold" + }, + { + "request": "0.025", + "threshold": "low", + "type": "search-threshold" + } + ], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "GPT", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.7", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.7", + "name": "OpenAI: GPT-4.1 Mini", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.7-20251222", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "openai/gpt-4.1-mini-2025-04-14", + "reasoning_config": null, "router": null, - "short_name": "GLM 4.7", - "slug": "z-ai/glm-4.7", - "updated_at": "2026-01-07T19:34:06.523149+00:00", + "short_name": "GPT-4.1 Mini", + "slug": "openai/gpt-4.1-mini", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Novita", - "slug": "novita" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "datacenters": ["US"], - "displayName": "NVIDIA", - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.nvidia.com/en-us/&size=256" - }, - "models": [], - "name": "Nvidia", - "slug": "nvidia" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": false - }, - "displayName": "OpenAI", - "headquarters": "US", - "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" - }, - "models": [ + }, { "author": "openai", - "context_length": 128000, - "created_at": "2024-08-14T00:00:00+00:00", + "context_length": 1047576, + "created_at": "2025-04-14T17:22:49+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "OpenAI ChatGPT 4o is continually updated by OpenAI to point to the current version of GPT-4o used by ChatGPT. It therefore differs slightly from the API version of [GPT-4o](/models/openai/gpt-4o) in that it has additional RLHF. It is intended for research and evaluation.\n\nOpenAI notes that this model is not suited for production use-cases as it may be removed or redirected to another model in the future.", + "description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 1047576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -115852,6 +114483,8 @@ "training": false }, "features": { + "supported_parameters": {}, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -115861,7 +114494,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "aff4b825-af10-4633-9ab2-9ac68c547988", + "id": "9251cee5-5503-4be9-9439-7ae21ff062a3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -115870,53 +114503,50 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2024-08-14T00:00:00+00:00", + "context_length": 1047576, + "created_at": "2025-04-14T17:22:49+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "OpenAI ChatGPT 4o is continually updated by OpenAI to point to the current version of GPT-4o used by ChatGPT. It therefore differs slightly from the API version of [GPT-4o](/models/openai/gpt-4o) in that it has additional RLHF. It is intended for research and evaluation.\n\nOpenAI notes that this model is not suited for production use-cases as it may be removed or redirected to another model in the future.", + "description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.", "features": {}, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: ChatGPT-4o", + "name": "OpenAI: GPT-4.1 Nano", "output_modalities": ["text"], - "permaslug": "openai/chatgpt-4o-latest", + "permaslug": "openai/gpt-4.1-nano-2025-04-14", "reasoning_config": null, "router": null, - "short_name": "ChatGPT-4o", - "slug": "openai/chatgpt-4o-latest", + "short_name": "GPT-4.1 Nano", + "slug": "openai/gpt-4.1-nano", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/chatgpt-4o-latest", - "model_variant_slug": "openai/chatgpt-4o-latest", + "model_variant_permaslug": "openai/gpt-4.1-nano-2025-04-14", + "model_variant_slug": "openai/gpt-4.1-nano", "moderation_required": true, - "name": "OpenAI | openai/chatgpt-4o-latest", + "name": "OpenAI | openai/gpt-4.1-nano-2025-04-14", "pricing": { - "completion": "0.000015", + "completion": "0.0000004", "discount": 0, - "image": "0.007225", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000025", + "prompt": "0.0000001", + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -115945,7 +114575,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "chatgpt-4o-latest", + "provider_model_id": "gpt-4.1-nano-2025-04-14", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -115955,18 +114585,14 @@ "max_tokens", "response_format", "structured_outputs", + "tools", + "tool_choice", "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "logit_bias", - "logprobs", - "top_logprobs" + "top_p" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, @@ -115976,31 +114602,31 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: ChatGPT-4o", + "name": "OpenAI: GPT-4.1 Nano", "output_modalities": ["text"], - "permaslug": "openai/chatgpt-4o-latest", + "permaslug": "openai/gpt-4.1-nano-2025-04-14", "reasoning_config": null, "router": null, - "short_name": "ChatGPT-4o", - "slug": "openai/chatgpt-4o-latest", + "short_name": "GPT-4.1 Nano", + "slug": "openai/gpt-4.1-nano", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 200000, - "created_at": "2025-05-16T15:36:01.081688+00:00", + "context_length": 128000, + "created_at": "2024-05-13T00:00:00+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "codex-mini-latest is a fine-tuned version of o4-mini specifically for use in Codex CLI. For direct use in the API, we recommend starting with gpt-4.1.", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -116010,10 +114636,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -116023,7 +114645,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "421864ed-5bce-4ae5-b02e-7345b5878842", + "id": "3d6584e7-a2bb-48d6-903d-24e3d90e7e55", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -116032,54 +114654,48 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 200000, - "created_at": "2025-05-16T15:36:01.081688+00:00", + "context_length": 128000, + "created_at": "2024-05-13T00:00:00+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "codex-mini-latest is a fine-tuned version of o4-mini specifically for use in Codex CLI. For direct use in the API, we recommend starting with gpt-4.1.", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "features": {}, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: Codex Mini", + "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", + "name": "OpenAI: GPT-4o (2024-05-13)", "output_modalities": ["text"], - "permaslug": "openai/codex-mini", + "permaslug": "openai/gpt-4o-2024-05-13", "reasoning_config": null, "router": null, - "short_name": "Codex Mini", - "slug": "openai/codex-mini", + "short_name": "GPT-4o (2024-05-13)", + "slug": "openai/gpt-4o-2024-05-13", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/codex-mini", - "model_variant_slug": "openai/codex-mini", + "model_variant_permaslug": "openai/gpt-4o-2024-05-13", + "model_variant_slug": "openai/gpt-4o-2024-05-13", "moderation_required": true, - "name": "OpenAI | openai/codex-mini", + "name": "OpenAI | openai/gpt-4o-2024-05-13", "pricing": { - "completion": "0.000006", + "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000375", - "internal_reasoning": "0", - "prompt": "0.0000015", - "request": "0", - "web_search": "0" + "prompt": "0.000005" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", + "adapterName": "OpenAIAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -116108,25 +114724,48 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "codex-mini-latest", + "provider_model_id": "gpt-4o-2024-05-13", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "seed", "max_tokens", + "response_format", + "structured_outputs", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "web_search_options", + "logit_bias", + "logprobs", + "top_logprobs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "request": "0.05", + "threshold": "high", + "type": "search-threshold" + }, + { + "request": "0.035", + "threshold": "medium", + "type": "search-threshold" + }, + { + "request": "0.03", + "threshold": "low", + "type": "search-threshold" + } + ], "variant": "standard" }, "features": {}, @@ -116135,31 +114774,31 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: Codex Mini", + "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", + "name": "OpenAI: GPT-4o (2024-05-13)", "output_modalities": ["text"], - "permaslug": "openai/codex-mini", + "permaslug": "openai/gpt-4o-2024-05-13", "reasoning_config": null, "router": null, - "short_name": "Codex Mini", - "slug": "openai/codex-mini", + "short_name": "GPT-4o (2024-05-13)", + "slug": "openai/gpt-4o-2024-05-13", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 16385, - "created_at": "2023-05-28T00:00:00+00:00", + "context_length": 128000, + "created_at": "2024-08-06T00:00:00+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", + "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 16385, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -116178,7 +114817,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "3a632f37-731d-4200-9e38-413a5f5dd39d", + "id": "9d15935a-34e6-4a5e-a5bc-c7dda213e876", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -116187,49 +114826,45 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 16385, - "created_at": "2023-05-28T00:00:00+00:00", + "context_length": 128000, + "created_at": "2024-08-06T00:00:00+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.", + "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)", "features": {}, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: GPT-3.5 Turbo", + "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", + "name": "OpenAI: GPT-4o (2024-08-06)", "output_modalities": ["text"], - "permaslug": "openai/gpt-3.5-turbo", + "permaslug": "openai/gpt-4o-2024-08-06", "reasoning_config": null, "router": null, - "short_name": "GPT-3.5 Turbo", - "slug": "openai/gpt-3.5-turbo", + "short_name": "GPT-4o (2024-08-06)", + "slug": "openai/gpt-4o-2024-08-06", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-3.5-turbo", - "model_variant_slug": "openai/gpt-3.5-turbo", + "model_variant_permaslug": "openai/gpt-4o-2024-08-06", + "model_variant_slug": "openai/gpt-4o-2024-08-06", "moderation_required": true, - "name": "OpenAI | openai/gpt-3.5-turbo", + "name": "OpenAI | openai/gpt-4o-2024-08-06", "pricing": { - "completion": "0.0000015", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000125", + "prompt": "0.0000025" }, "provider_display_name": "OpenAI", "provider_info": { @@ -116262,7 +114897,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-3.5-turbo", + "provider_model_id": "gpt-4o-2024-08-06", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -116277,6 +114912,7 @@ "stop", "frequency_penalty", "presence_penalty", + "web_search_options", "logit_bias", "logprobs", "top_logprobs", @@ -116286,7 +114922,23 @@ "supports_multipart": true, "supports_reasoning": false, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "request": "0.05", + "threshold": "high", + "type": "search-threshold" + }, + { + "request": "0.035", + "threshold": "medium", + "type": "search-threshold" + }, + { + "request": "0.03", + "threshold": "low", + "type": "search-threshold" + } + ], "variant": "standard" }, "features": {}, @@ -116295,31 +114947,31 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: GPT-3.5 Turbo", + "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", + "name": "OpenAI: GPT-4o (2024-08-06)", "output_modalities": ["text"], - "permaslug": "openai/gpt-3.5-turbo", + "permaslug": "openai/gpt-4o-2024-08-06", "reasoning_config": null, "router": null, - "short_name": "GPT-3.5 Turbo", - "slug": "openai/gpt-3.5-turbo", + "short_name": "GPT-4o (2024-08-06)", + "slug": "openai/gpt-4o-2024-08-06", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 16385, - "created_at": "2023-08-28T00:00:00+00:00", + "context_length": 128000, + "created_at": "2024-11-20T18:33:14.771895+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.", + "description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability. It’s also better at working with uploaded files, providing deeper insights & more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 16385, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -116338,7 +114990,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "eb1a93d0-a295-4afb-86d3-e2d10538c12d", + "id": "3e86b7c5-bffe-4b60-a3dd-b36451978775", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -116347,49 +114999,45 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 16385, - "created_at": "2023-08-28T00:00:00+00:00", + "context_length": 128000, + "created_at": "2024-11-20T18:33:14.771895+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up to Sep 2021.", + "description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability. It’s also better at working with uploaded files, providing deeper insights & more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.", "features": {}, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: GPT-3.5 Turbo 16k", + "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", + "name": "OpenAI: GPT-4o (2024-11-20)", "output_modalities": ["text"], - "permaslug": "openai/gpt-3.5-turbo-16k", + "permaslug": "openai/gpt-4o-2024-11-20", "reasoning_config": null, "router": null, - "short_name": "GPT-3.5 Turbo 16k", - "slug": "openai/gpt-3.5-turbo-16k", + "short_name": "GPT-4o (2024-11-20)", + "slug": "openai/gpt-4o-2024-11-20", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-3.5-turbo-16k", - "model_variant_slug": "openai/gpt-3.5-turbo-16k", + "model_variant_permaslug": "openai/gpt-4o-2024-11-20", + "model_variant_slug": "openai/gpt-4o-2024-11-20", "moderation_required": true, - "name": "OpenAI | openai/gpt-3.5-turbo-16k", + "name": "OpenAI | openai/gpt-4o-2024-11-20", "pricing": { - "completion": "0.000004", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000125", + "prompt": "0.0000025" }, "provider_display_name": "OpenAI", "provider_info": { @@ -116422,7 +115070,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-3.5-turbo-16k", + "provider_model_id": "gpt-4o-2024-11-20", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -116437,6 +115085,7 @@ "stop", "frequency_penalty", "presence_penalty", + "web_search_options", "logit_bias", "logprobs", "top_logprobs", @@ -116446,7 +115095,23 @@ "supports_multipart": true, "supports_reasoning": false, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "request": "0.05", + "threshold": "high", + "type": "search-threshold" + }, + { + "request": "0.035", + "threshold": "medium", + "type": "search-threshold" + }, + { + "request": "0.03", + "threshold": "low", + "type": "search-threshold" + } + ], "variant": "standard" }, "features": {}, @@ -116455,31 +115120,31 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: GPT-3.5 Turbo 16k", + "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", + "name": "OpenAI: GPT-4o (2024-11-20)", "output_modalities": ["text"], - "permaslug": "openai/gpt-3.5-turbo-16k", + "permaslug": "openai/gpt-4o-2024-11-20", "reasoning_config": null, "router": null, - "short_name": "GPT-3.5 Turbo 16k", - "slug": "openai/gpt-3.5-turbo-16k", + "short_name": "GPT-4o (2024-11-20)", + "slug": "openai/gpt-4o-2024-11-20", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 4095, - "created_at": "2023-09-28T00:00:00+00:00", + "context_length": 128000, + "created_at": "2024-05-13T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": [], "default_system": null, - "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 4095, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -116498,7 +115163,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "39594b4d-6c7e-4ccd-aeed-79f9b3ca5819", + "id": "3f4c883a-bd8b-4e01-ac1b-25cc9a17dd61", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -116507,49 +115172,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": 64000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 4095, - "created_at": "2023-09-28T00:00:00+00:00", + "context_length": 128000, + "created_at": "2024-05-13T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": [], "default_system": null, - "description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.", + "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", "features": {}, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", - "model_version_group_id": null, - "name": "OpenAI: GPT-3.5 Turbo Instruct", + "input_modalities": ["text", "image", "file"], + "instruct_type": null, + "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", + "name": "OpenAI: GPT-4o", "output_modalities": ["text"], - "permaslug": "openai/gpt-3.5-turbo-instruct", + "permaslug": "openai/gpt-4o", "reasoning_config": null, "router": null, - "short_name": "GPT-3.5 Turbo Instruct", - "slug": "openai/gpt-3.5-turbo-instruct", + "short_name": "GPT-4o", + "slug": "openai/gpt-4o", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-3.5-turbo-instruct", - "model_variant_slug": "openai/gpt-3.5-turbo-instruct", + "model_variant_permaslug": "openai/gpt-4o:extended", + "model_variant_slug": "openai/gpt-4o:extended", "moderation_required": true, - "name": "OpenAI | openai/gpt-3.5-turbo-instruct", + "name": "OpenAI | openai/gpt-4o:extended", "pricing": { - "completion": "0.000002", + "completion": "0.000018", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000015", - "request": "0", - "web_search": "0" + "prompt": "0.000006" }, "provider_display_name": "OpenAI", "provider_info": { @@ -116582,7 +115242,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-3.5-turbo-instruct", + "provider_model_id": "gpt-4o-64k-output-alpha", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -116597,15 +115257,34 @@ "stop", "frequency_penalty", "presence_penalty", + "web_search_options", "logit_bias", "logprobs", - "top_logprobs" + "top_logprobs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, - "variable_pricings": [], - "variant": "standard" + "supports_tool_parameters": true, + "variable_pricings": [ + { + "request": "0.05", + "threshold": "high", + "type": "search-threshold" + }, + { + "request": "0.035", + "threshold": "medium", + "type": "search-threshold" + }, + { + "request": "0.03", + "threshold": "low", + "type": "search-threshold" + } + ], + "variant": "extended" }, "features": {}, "group": "GPT", @@ -116613,31 +115292,35 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", - "model_version_group_id": null, - "name": "OpenAI: GPT-3.5 Turbo Instruct", + "input_modalities": ["text", "image", "file"], + "instruct_type": null, + "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", + "name": "OpenAI: GPT-4o (extended)", "output_modalities": ["text"], - "permaslug": "openai/gpt-3.5-turbo-instruct", + "permaslug": "openai/gpt-4o", "reasoning_config": null, "router": null, - "short_name": "GPT-3.5 Turbo Instruct", - "slug": "openai/gpt-3.5-turbo-instruct", + "short_name": "GPT-4o (extended)", + "slug": "openai/gpt-4o", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 8191, - "created_at": "2023-05-28T00:00:00+00:00", - "default_parameters": {}, + "context_length": 128000, + "created_at": "2025-08-15T04:44:21.353523+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.", + "description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences. Audio outputs are currently not supported. Audio tokens are priced at $40 per million input and $80 per million output audio tokens.", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 8191, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -116647,6 +115330,8 @@ "training": false }, "features": { + "supported_parameters": {}, + "supports_input_audio": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -116656,7 +115341,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "355b1df4-06c8-4c36-a091-3d50477095fb", + "id": "d93227c0-2904-486b-a79e-e94419fce096", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -116665,53 +115350,63 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 8191, - "created_at": "2023-05-28T00:00:00+00:00", - "default_parameters": {}, + "context_length": 128000, + "created_at": "2025-08-15T04:44:21.353523+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning capabilities. Training data: up to Sep 2021.", - "features": {}, + "description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences. Audio outputs are currently not supported. Audio tokens are priced at $40 per million input and $80 per million output audio tokens.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["audio", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4", - "output_modalities": ["text"], - "permaslug": "openai/gpt-4", - "reasoning_config": null, + "name": "OpenAI: GPT-4o Audio", + "output_modalities": ["text", "audio"], + "permaslug": "openai/gpt-4o-audio-preview", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4", - "slug": "openai/gpt-4", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-4o Audio", + "slug": "openai/gpt-4o-audio-preview", + "updated_at": "2026-01-19T19:38:46.595105+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4", - "model_variant_slug": "openai/gpt-4", + "model_variant_permaslug": "openai/gpt-4o-audio-preview", + "model_variant_slug": "openai/gpt-4o-audio-preview", "moderation_required": true, - "name": "OpenAI | openai/gpt-4", + "name": "OpenAI | openai/gpt-4o-audio-preview", "pricing": { - "completion": "0.00006", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00003", - "request": "0", - "web_search": "0" + "prompt": "0.0000025" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -116740,7 +115435,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4", + "provider_model_id": "gpt-4o-audio-preview-2025-06-03", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -116767,37 +115462,48 @@ "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["audio", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4", - "output_modalities": ["text"], - "permaslug": "openai/gpt-4", - "reasoning_config": null, + "name": "OpenAI: GPT-4o Audio", + "output_modalities": ["text", "audio"], + "permaslug": "openai/gpt-4o-audio-preview", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4", - "slug": "openai/gpt-4", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-4o Audio", + "slug": "openai/gpt-4o-audio-preview", + "updated_at": "2026-01-19T19:38:46.595105+00:00", "warning_message": null }, { "author": "openai", - "context_length": 8191, - "created_at": "2023-05-28T00:00:00+00:00", + "context_length": 128000, + "created_at": "2025-03-12T22:19:09.996816+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.", + "description": "GPT-4o Search Previewis a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 8191, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -116816,7 +115522,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "69206fec-9f6f-4338-9919-5ed90134c376", + "id": "f37536d3-fa09-47a3-b63c-831a1965253e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -116825,17 +115531,17 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 8191, - "created_at": "2023-05-28T00:00:00+00:00", + "context_length": 128000, + "created_at": "2025-03-12T22:19:09.996816+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.", + "description": "GPT-4o Search Previewis a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", "features": {}, "group": "GPT", "has_text_output": true, @@ -116845,33 +115551,29 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4 (older v0314)", + "name": "OpenAI: GPT-4o Search Preview", "output_modalities": ["text"], - "permaslug": "openai/gpt-4-0314", + "permaslug": "openai/gpt-4o-search-preview-2025-03-11", "reasoning_config": null, "router": null, - "short_name": "GPT-4 (older v0314)", - "slug": "openai/gpt-4-0314", + "short_name": "GPT-4o Search Preview", + "slug": "openai/gpt-4o-search-preview", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4-0314", - "model_variant_slug": "openai/gpt-4-0314", + "model_variant_permaslug": "openai/gpt-4o-search-preview-2025-03-11", + "model_variant_slug": "openai/gpt-4o-search-preview", "moderation_required": true, - "name": "OpenAI | openai/gpt-4-0314", + "name": "OpenAI | openai/gpt-4o-search-preview-2025-03-11", "pricing": { - "completion": "0.00006", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00003", - "request": "0", - "web_search": "0" + "prompt": "0.0000025", + "web_search": "0.035" }, - "provider_display_name": "OpenAI", + "provider_display_name": "OpenAIAdapter", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -116882,7 +115584,7 @@ "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "OpenAI", + "displayName": "OpenAIAdapter", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, @@ -116900,31 +115602,37 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4-0314", + "provider_model_id": "gpt-4o-search-preview-2025-03-11", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ - "seed", + "web_search_options", "max_tokens", "response_format", - "structured_outputs", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "tools", - "tool_choice" + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [], + "supports_tool_parameters": false, + "variable_pricings": [ + { + "request": "0.05", + "threshold": "high", + "type": "search-threshold" + }, + { + "request": "0.035", + "threshold": "medium", + "type": "search-threshold" + }, + { + "request": "0.03", + "threshold": "low", + "type": "search-threshold" + } + ], "variant": "standard" }, "features": {}, @@ -116936,24 +115644,24 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4 (older v0314)", + "name": "OpenAI: GPT-4o Search Preview", "output_modalities": ["text"], - "permaslug": "openai/gpt-4-0314", + "permaslug": "openai/gpt-4o-search-preview-2025-03-11", "reasoning_config": null, "router": null, - "short_name": "GPT-4 (older v0314)", - "slug": "openai/gpt-4-0314", + "short_name": "GPT-4o Search Preview", + "slug": "openai/gpt-4o-search-preview", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", "context_length": 128000, - "created_at": "2024-04-09T00:00:00+00:00", + "created_at": "2024-07-18T00:00:00+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.", + "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": true, @@ -116976,7 +115684,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "da16824f-3ba0-43a1-86f8-a6131837f457", + "id": "77e40332-6f2a-4c48-bc14-e44596b30ce2", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -116985,49 +115693,45 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", "context_length": 128000, - "created_at": "2024-04-09T00:00:00+00:00", + "created_at": "2024-07-18T00:00:00+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.", + "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "features": {}, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4 Turbo", + "name": "OpenAI: GPT-4o-mini", "output_modalities": ["text"], - "permaslug": "openai/gpt-4-turbo", + "permaslug": "openai/gpt-4o-mini", "reasoning_config": null, "router": null, - "short_name": "GPT-4 Turbo", - "slug": "openai/gpt-4-turbo", + "short_name": "GPT-4o-mini", + "slug": "openai/gpt-4o-mini", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4-turbo", - "model_variant_slug": "openai/gpt-4-turbo", + "model_variant_permaslug": "openai/gpt-4o-mini", + "model_variant_slug": "openai/gpt-4o-mini", "moderation_required": true, - "name": "OpenAI | openai/gpt-4-turbo", + "name": "OpenAI | openai/gpt-4o-mini", "pricing": { - "completion": "0.00003", + "completion": "0.0000006", "discount": 0, - "image": "0.01445", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000075", + "prompt": "0.00000015" }, "provider_display_name": "OpenAI", "provider_info": { @@ -117060,7 +115764,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4-turbo", + "provider_model_id": "gpt-4o-mini", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -117075,6 +115779,7 @@ "stop", "frequency_penalty", "presence_penalty", + "web_search_options", "logit_bias", "logprobs", "top_logprobs", @@ -117084,38 +115789,54 @@ "supports_multipart": true, "supports_reasoning": false, "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "standard" - }, - "features": {}, + "variable_pricings": [ + { + "request": "0.03", + "threshold": "high", + "type": "search-threshold" + }, + { + "request": "0.0275", + "threshold": "medium", + "type": "search-threshold" + }, + { + "request": "0.025", + "threshold": "low", + "type": "search-threshold" + } + ], + "variant": "standard" + }, + "features": {}, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4 Turbo", + "name": "OpenAI: GPT-4o-mini", "output_modalities": ["text"], - "permaslug": "openai/gpt-4-turbo", + "permaslug": "openai/gpt-4o-mini", "reasoning_config": null, "router": null, - "short_name": "GPT-4 Turbo", - "slug": "openai/gpt-4-turbo", + "short_name": "GPT-4o-mini", + "slug": "openai/gpt-4o-mini", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", "context_length": 128000, - "created_at": "2023-11-06T00:00:00+00:00", + "created_at": "2024-07-18T00:00:00+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.", + "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, "context_length": 128000, "data_policy": { @@ -117127,7 +115848,6 @@ "training": false }, "features": { - "supports_file_urls": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -117137,7 +115857,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "8744de26-f64f-41cf-bd0e-950a83d1a923", + "id": "ebcc1f0a-6621-4cdc-a93f-88a6e2cc2e15", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -117146,53 +115866,49 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", "context_length": 128000, - "created_at": "2023-11-06T00:00:00+00:00", + "created_at": "2024-07-18T00:00:00+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.", + "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", "features": {}, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4 Turbo (older v1106)", + "name": "OpenAI: GPT-4o-mini (2024-07-18)", "output_modalities": ["text"], - "permaslug": "openai/gpt-4-1106-preview", + "permaslug": "openai/gpt-4o-mini-2024-07-18", "reasoning_config": null, "router": null, - "short_name": "GPT-4 Turbo (older v1106)", - "slug": "openai/gpt-4-1106-preview", + "short_name": "GPT-4o-mini (2024-07-18)", + "slug": "openai/gpt-4o-mini-2024-07-18", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4-1106-preview", - "model_variant_slug": "openai/gpt-4-1106-preview", + "model_variant_permaslug": "openai/gpt-4o-mini-2024-07-18", + "model_variant_slug": "openai/gpt-4o-mini-2024-07-18", "moderation_required": true, - "name": "OpenAI | openai/gpt-4-1106-preview", + "name": "OpenAI | openai/gpt-4o-mini-2024-07-18", "pricing": { - "completion": "0.00003", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000075", + "prompt": "0.00000015" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", + "adapterName": "OpenAIAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -117221,7 +115937,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4-1106-preview", + "provider_model_id": "gpt-4o-mini-2024-07-18", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -117236,6 +115952,7 @@ "stop", "frequency_penalty", "presence_penalty", + "web_search_options", "logit_bias", "logprobs", "top_logprobs", @@ -117245,7 +115962,23 @@ "supports_multipart": true, "supports_reasoning": false, "supports_tool_parameters": true, - "variable_pricings": [], + "variable_pricings": [ + { + "request": "0.03", + "threshold": "high", + "type": "search-threshold" + }, + { + "request": "0.0275", + "threshold": "medium", + "type": "search-threshold" + }, + { + "request": "0.025", + "threshold": "low", + "type": "search-threshold" + } + ], "variant": "standard" }, "features": {}, @@ -117254,27 +115987,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4 Turbo (older v1106)", + "name": "OpenAI: GPT-4o-mini (2024-07-18)", "output_modalities": ["text"], - "permaslug": "openai/gpt-4-1106-preview", + "permaslug": "openai/gpt-4o-mini-2024-07-18", "reasoning_config": null, "router": null, - "short_name": "GPT-4 Turbo (older v1106)", - "slug": "openai/gpt-4-1106-preview", + "short_name": "GPT-4o-mini (2024-07-18)", + "slug": "openai/gpt-4o-mini-2024-07-18", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", "context_length": 128000, - "created_at": "2024-01-25T00:00:00+00:00", + "created_at": "2025-03-12T22:22:02.718344+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.", + "description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": true, @@ -117297,7 +116030,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "003933da-395a-48eb-86a3-0c4ec486d67f", + "id": "5154b382-e458-4539-bf6d-cbadfbaa0600", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -117306,17 +116039,17 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", "context_length": 128000, - "created_at": "2024-01-25T00:00:00+00:00", + "created_at": "2025-03-12T22:22:02.718344+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023.\n\n**Note:** heavily rate limited by OpenAI while in preview.", + "description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", "features": {}, "group": "GPT", "has_text_output": true, @@ -117326,29 +116059,25 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4 Turbo Preview", + "name": "OpenAI: GPT-4o-mini Search Preview", "output_modalities": ["text"], - "permaslug": "openai/gpt-4-turbo-preview", + "permaslug": "openai/gpt-4o-mini-search-preview-2025-03-11", "reasoning_config": null, "router": null, - "short_name": "GPT-4 Turbo Preview", - "slug": "openai/gpt-4-turbo-preview", + "short_name": "GPT-4o-mini Search Preview", + "slug": "openai/gpt-4o-mini-search-preview", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4-turbo-preview", - "model_variant_slug": "openai/gpt-4-turbo-preview", + "model_variant_permaslug": "openai/gpt-4o-mini-search-preview-2025-03-11", + "model_variant_slug": "openai/gpt-4o-mini-search-preview", "moderation_required": true, - "name": "OpenAI | openai/gpt-4-turbo-preview", + "name": "OpenAI | openai/gpt-4o-mini-search-preview-2025-03-11", "pricing": { - "completion": "0.00003", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00001", - "request": "0", - "web_search": "0" + "prompt": "0.00000015", + "web_search": "0.0275" }, "provider_display_name": "OpenAI", "provider_info": { @@ -117381,31 +116110,37 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4-turbo-preview", + "provider_model_id": "gpt-4o-mini-search-preview-2025-03-11", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ - "seed", + "web_search_options", "max_tokens", "response_format", - "structured_outputs", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "logit_bias", - "logprobs", - "top_logprobs", - "tools", - "tool_choice" + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [], + "supports_tool_parameters": false, + "variable_pricings": [ + { + "request": "0.03", + "threshold": "high", + "type": "search-threshold" + }, + { + "request": "0.0275", + "threshold": "medium", + "type": "search-threshold" + }, + { + "request": "0.025", + "threshold": "low", + "type": "search-threshold" + } + ], "variant": "standard" }, "features": {}, @@ -117417,28 +116152,32 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4 Turbo Preview", + "name": "OpenAI: GPT-4o-mini Search Preview", "output_modalities": ["text"], - "permaslug": "openai/gpt-4-turbo-preview", + "permaslug": "openai/gpt-4o-mini-search-preview-2025-03-11", "reasoning_config": null, "router": null, - "short_name": "GPT-4 Turbo Preview", - "slug": "openai/gpt-4-turbo-preview", + "short_name": "GPT-4o-mini Search Preview", + "slug": "openai/gpt-4o-mini-search-preview", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 1047576, - "created_at": "2025-04-14T17:23:05+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-08-07T17:23:33+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.", + "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 1047576, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -117448,7 +116187,14 @@ "training": false }, "features": { - "supported_parameters": {}, + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": false, "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, @@ -117459,7 +116205,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c235abe8-11cc-42d3-95ad-72f4d198287a", + "id": "7c2f859a-7890-4e8e-b1de-1cd1c0a800b4", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -117468,49 +116214,62 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, - "max_prompt_tokens": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": 272000, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 1047576, - "created_at": "2025-04-14T17:23:05+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-08-07T17:23:33+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and GPT-4.5 across coding (54.6% SWE-bench Verified), instruction compliance (87.4% IFEval), and multimodal understanding benchmarks. It is tuned for precise code diffs, agent reliability, and high recall in large document contexts, making it ideal for agents, IDE tooling, and enterprise knowledge retrieval.", - "features": {}, + "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4.1", + "name": "OpenAI: GPT-5", "output_modalities": ["text"], - "permaslug": "openai/gpt-4.1-2025-04-14", - "reasoning_config": null, + "permaslug": "openai/gpt-5-2025-08-07", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4.1", - "slug": "openai/gpt-4.1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5", + "slug": "openai/gpt-5", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4.1-2025-04-14", - "model_variant_slug": "openai/gpt-4.1", + "model_variant_permaslug": "openai/gpt-5-2025-08-07", + "model_variant_slug": "openai/gpt-5", "moderation_required": true, - "name": "OpenAI | openai/gpt-4.1-2025-04-14", + "name": "OpenAI | openai/gpt-5-2025-08-07", "pricing": { - "completion": "0.000008", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000005", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", + "input_cache_read": "0.000000125", + "prompt": "0.00000125", "web_search": "0.01" }, "provider_display_name": "OpenAI", @@ -117541,77 +116300,74 @@ "moderationRequired": true, "name": "OpenAI", "owners": ["{}"], - "slug": "openai", + "slug": "openai/default", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4.1-2025-04-14", + "provider_model_id": "gpt-5-2025-08-07", "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "openai", + "provider_slug": "openai/default", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", "tools", - "tool_choice", - "temperature", - "top_p" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "request": "0.05", - "threshold": "high", - "type": "search-threshold" - }, - { - "request": "0.035", - "threshold": "medium", - "type": "search-threshold" - }, - { - "request": "0.03", - "threshold": "low", - "type": "search-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text", "image", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4.1", + "name": "OpenAI: GPT-5", "output_modalities": ["text"], - "permaslug": "openai/gpt-4.1-2025-04-14", - "reasoning_config": null, + "permaslug": "openai/gpt-5-2025-08-07", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4.1", - "slug": "openai/gpt-4.1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5", + "slug": "openai/gpt-5", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", - "context_length": 1047576, - "created_at": "2025-04-14T17:23:01+00:00", + "context_length": 128000, + "created_at": "2025-08-07T17:30:37.42514+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.", + "description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 1047576, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -117621,7 +116377,13 @@ "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": false, "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, @@ -117632,7 +116394,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "872eccb7-9c85-45fc-974a-ff7c8e2407e6", + "id": "8f8398f5-523f-4676-8ba4-e82e3484efb1", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -117641,49 +116403,55 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 1047576, - "created_at": "2025-04-14T17:23:01+00:00", + "context_length": 128000, + "created_at": "2025-08-07T17:30:37.42514+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard instruction evals, 35.8% on MultiChallenge, and 84.1% on IFEval. Mini also shows strong coding ability (e.g., 31.6% on Aider’s polyglot diff benchmark) and vision understanding, making it suitable for interactive applications with tight performance constraints.", - "features": {}, + "description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4.1 Mini", + "name": "OpenAI: GPT-5 Chat", "output_modalities": ["text"], - "permaslug": "openai/gpt-4.1-mini-2025-04-14", - "reasoning_config": null, + "permaslug": "openai/gpt-5-chat-2025-08-07", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4.1 Mini", - "slug": "openai/gpt-4.1-mini", + "short_name": "GPT-5 Chat", + "slug": "openai/gpt-5-chat", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4.1-mini-2025-04-14", - "model_variant_slug": "openai/gpt-4.1-mini", + "model_variant_permaslug": "openai/gpt-5-chat-2025-08-07", + "model_variant_slug": "openai/gpt-5-chat", "moderation_required": true, - "name": "OpenAI | openai/gpt-4.1-mini-2025-04-14", + "name": "OpenAI | openai/gpt-5-chat-2025-08-07", "pricing": { - "completion": "0.0000016", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000001", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", + "input_cache_read": "0.000000125", + "prompt": "0.00000125", "web_search": "0.01" }, "provider_display_name": "OpenAI", @@ -117717,74 +116485,63 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4.1-mini-2025-04-14", + "provider_model_id": "gpt-5-chat-latest", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", - "supported_parameters": [ - "seed", - "max_tokens", - "response_format", - "structured_outputs", - "tools", - "tool_choice", - "temperature", - "top_p" - ], + "supported_parameters": ["structured_outputs", "response_format", "seed", "max_tokens"], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [ - { - "request": "0.03", - "threshold": "high", - "type": "search-threshold" - }, - { - "request": "0.0275", - "threshold": "medium", - "type": "search-threshold" - }, - { - "request": "0.025", - "threshold": "low", - "type": "search-threshold" - } - ], + "supports_tool_parameters": false, + "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4.1 Mini", + "name": "OpenAI: GPT-5 Chat", "output_modalities": ["text"], - "permaslug": "openai/gpt-4.1-mini-2025-04-14", - "reasoning_config": null, + "permaslug": "openai/gpt-5-chat-2025-08-07", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4.1 Mini", - "slug": "openai/gpt-4.1-mini", + "short_name": "GPT-5 Chat", + "slug": "openai/gpt-5-chat", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 1047576, - "created_at": "2025-04-14T17:22:49+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-09-23T16:03:23.098042+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.", + "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 1047576, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -117794,8 +116551,13 @@ "training": false }, "features": { - "supported_parameters": {}, - "supports_native_web_search": true, + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_implicit_caching": true, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -117805,7 +116567,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9251cee5-5503-4be9-9439-7ae21ff062a3", + "id": "f10a63bc-2bcd-4726-9e75-1e482efd080c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -117814,50 +116576,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, - "max_prompt_tokens": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": 272000, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 1047576, - "created_at": "2025-04-14T17:22:49+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-09-23T16:03:23.098042+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million token context window, and scores 80.1% on MMLU, 50.3% on GPQA, and 9.8% on Aider polyglot coding – even higher than GPT‑4o mini. It’s ideal for tasks like classification or autocompletion.", - "features": {}, + "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4.1 Nano", + "name": "OpenAI: GPT-5 Codex", "output_modalities": ["text"], - "permaslug": "openai/gpt-4.1-nano-2025-04-14", - "reasoning_config": null, + "permaslug": "openai/gpt-5-codex", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4.1 Nano", - "slug": "openai/gpt-4.1-nano", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Codex", + "slug": "openai/gpt-5-codex", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4.1-nano-2025-04-14", - "model_variant_slug": "openai/gpt-4.1-nano", + "model_variant_permaslug": "openai/gpt-5-codex", + "model_variant_slug": "openai/gpt-5-codex", "moderation_required": true, - "name": "OpenAI | openai/gpt-4.1-nano-2025-04-14", + "name": "OpenAI | openai/gpt-5-codex", "pricing": { - "completion": "0.0000004", + "completion": "0.00001", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000025", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0.01" + "input_cache_read": "0.000000125", + "prompt": "0.00000125" }, "provider_display_name": "OpenAI", "provider_info": { @@ -117890,58 +116661,72 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4.1-nano-2025-04-14", + "provider_model_id": "gpt-5-codex", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", "tools", - "tool_choice", - "temperature", - "top_p" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4.1 Nano", + "name": "OpenAI: GPT-5 Codex", "output_modalities": ["text"], - "permaslug": "openai/gpt-4.1-nano-2025-04-14", - "reasoning_config": null, + "permaslug": "openai/gpt-5-codex", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4.1 Nano", - "slug": "openai/gpt-4.1-nano", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Codex", + "slug": "openai/gpt-5-codex", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2024-05-13T00:00:00+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-10-14T13:19:46.029021+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", + "description": "[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's GPT-5 model with state-of-the-art image generation capabilities. It offers major improvements in reasoning, code quality, and user experience while incorporating GPT Image 1's superior instruction following, text rendering, and detailed image editing.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -117951,6 +116736,15 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": false, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -117960,7 +116754,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "3d6584e7-a2bb-48d6-903d-24e3d90e7e55", + "id": "be0ed145-8bfc-4aec-a62d-685ed334fe17", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -117969,53 +116763,68 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, - "max_prompt_tokens": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": 272000, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2024-05-13T00:00:00+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-10-14T13:19:46.029021+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", - "features": {}, + "description": "[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's GPT-5 model with state-of-the-art image generation capabilities. It offers major improvements in reasoning, code quality, and user experience while incorporating GPT Image 1's superior instruction following, text rendering, and detailed image editing.", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, - "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", - "name": "OpenAI: GPT-4o (2024-05-13)", - "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-2024-05-13", - "reasoning_config": null, + "model_version_group_id": null, + "name": "OpenAI: GPT-5 Image", + "output_modalities": ["image", "text"], + "permaslug": "openai/gpt-5-image", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o (2024-05-13)", - "slug": "openai/gpt-4o-2024-05-13", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Image", + "slug": "openai/gpt-5-image", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4o-2024-05-13", - "model_variant_slug": "openai/gpt-4o-2024-05-13", + "model_variant_permaslug": "openai/gpt-5-image", + "model_variant_slug": "openai/gpt-5-image", "moderation_required": true, - "name": "OpenAI | openai/gpt-4o-2024-05-13", + "name": "OpenAI | openai/gpt-5-image", "pricing": { - "completion": "0.000015", + "completion": "0.00001", "discount": 0, - "image": "0.007225", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000005", - "request": "0", - "web_search": "0" + "image_output": "0.00004", + "input_cache_read": "0.00000125", + "prompt": "0.00001", + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -118044,22 +116853,23 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4o-2024-05-13", + "provider_model_id": "gpt-5-2025-08-07", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "web_search_options", "logit_bias", "logprobs", "top_logprobs", @@ -118067,58 +116877,59 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "request": "0.05", - "threshold": "high", - "type": "search-threshold" - }, - { - "request": "0.035", - "threshold": "medium", - "type": "search-threshold" - }, - { - "request": "0.03", - "threshold": "low", - "type": "search-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, - "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", - "name": "OpenAI: GPT-4o (2024-05-13)", - "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-2024-05-13", - "reasoning_config": null, + "model_version_group_id": null, + "name": "OpenAI: GPT-5 Image", + "output_modalities": ["image", "text"], + "permaslug": "openai/gpt-5-image", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o (2024-05-13)", - "slug": "openai/gpt-4o-2024-05-13", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Image", + "slug": "openai/gpt-5-image", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2024-08-06T00:00:00+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-10-16T14:23:03.143259+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)", + "description": "GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation. This natively multimodal model features superior instruction following, text rendering, and detailed image editing with reduced latency and cost. It excels at high-quality visual creation while maintaining strong text understanding, making it ideal for applications that require both efficient image generation and text processing at scale.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -118128,6 +116939,16 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": false, + "supports_multipart": true, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -118137,7 +116958,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9d15935a-34e6-4a5e-a5bc-c7dda213e876", + "id": "7c09094a-64ec-4d53-bd69-c165ac31c465", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -118146,54 +116967,65 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, - "max_prompt_tokens": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": 272000, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2024-08-06T00:00:00+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-10-16T14:23:03.143259+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/).\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)", - "features": {}, + "description": "GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation. This natively multimodal model features superior instruction following, text rendering, and detailed image editing with reduced latency and cost. It excels at high-quality visual creation while maintaining strong text understanding, making it ideal for applications that require both efficient image generation and text processing at scale.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["file", "image", "text"], "instruct_type": null, - "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", - "name": "OpenAI: GPT-4o (2024-08-06)", - "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-2024-08-06", - "reasoning_config": null, + "model_version_group_id": null, + "name": "OpenAI: GPT-5 Image Mini", + "output_modalities": ["image", "text"], + "permaslug": "openai/gpt-5-image-mini", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o (2024-08-06)", - "slug": "openai/gpt-4o-2024-08-06", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Image Mini", + "slug": "openai/gpt-5-image-mini", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4o-2024-08-06", - "model_variant_slug": "openai/gpt-4o-2024-08-06", + "model_variant_permaslug": "openai/gpt-5-image-mini", + "model_variant_slug": "openai/gpt-5-image-mini", "moderation_required": true, - "name": "OpenAI | openai/gpt-4o-2024-08-06", + "name": "OpenAI | openai/gpt-5-image-mini", "pricing": { - "completion": "0.00001", + "completion": "0.000002", "discount": 0, - "image": "0.003613", - "image_output": "0", - "input_cache_read": "0.00000125", - "internal_reasoning": "0", + "image_output": "0.000008", + "input_cache_read": "0.00000025", "prompt": "0.0000025", - "request": "0", - "web_search": "0" + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -118222,22 +117054,23 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4o-2024-08-06", + "provider_model_id": "gpt-5-mini-2025-08-07", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", "temperature", "top_p", "stop", "frequency_penalty", "presence_penalty", - "web_search_options", "logit_bias", "logprobs", "top_logprobs", @@ -118245,58 +117078,52 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "request": "0.05", - "threshold": "high", - "type": "search-threshold" - }, - { - "request": "0.035", - "threshold": "medium", - "type": "search-threshold" - }, - { - "request": "0.03", - "threshold": "low", - "type": "search-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["file", "image", "text"], "instruct_type": null, - "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", - "name": "OpenAI: GPT-4o (2024-08-06)", - "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-2024-08-06", - "reasoning_config": null, + "model_version_group_id": null, + "name": "OpenAI: GPT-5 Image Mini", + "output_modalities": ["image", "text"], + "permaslug": "openai/gpt-5-image-mini", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o (2024-08-06)", - "slug": "openai/gpt-4o-2024-08-06", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Image Mini", + "slug": "openai/gpt-5-image-mini", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2024-11-20T18:33:14.771895+00:00", + "context_length": 400000, + "created_at": "2025-08-07T17:23:27+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability. It’s also better at working with uploaded files, providing deeper insights & more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.", + "description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -118306,6 +117133,15 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": false, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -118315,7 +117151,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "3e86b7c5-bffe-4b60-a3dd-b36451978775", + "id": "c4f66d01-20b0-4c27-a225-438ea22fda43", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -118324,18 +117160,24 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, - "max_prompt_tokens": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": 272000, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2024-11-20T18:33:14.771895+00:00", + "context_length": 400000, + "created_at": "2025-08-07T17:23:27+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance & readability. It’s also better at working with uploaded files, providing deeper insights & more thorough responses.\n\nGPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.", - "features": {}, + "description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, @@ -118343,35 +117185,35 @@ "hidden": false, "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", - "name": "OpenAI: GPT-4o (2024-11-20)", + "model_version_group_id": null, + "name": "OpenAI: GPT-5 Mini", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-2024-11-20", - "reasoning_config": null, + "permaslug": "openai/gpt-5-mini-2025-08-07", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o (2024-11-20)", - "slug": "openai/gpt-4o-2024-11-20", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Mini", + "slug": "openai/gpt-5-mini", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4o-2024-11-20", - "model_variant_slug": "openai/gpt-4o-2024-11-20", + "model_variant_permaslug": "openai/gpt-5-mini-2025-08-07", + "model_variant_slug": "openai/gpt-5-mini", "moderation_required": true, - "name": "OpenAI | openai/gpt-4o-2024-11-20", + "name": "OpenAI | openai/gpt-5-mini-2025-08-07", "pricing": { - "completion": "0.00001", + "completion": "0.000002", "discount": 0, - "image": "0.003613", - "image_output": "0", - "input_cache_read": "0.00000125", - "internal_reasoning": "0", - "prompt": "0.0000025", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000025", + "prompt": "0.00000025", + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -118400,51 +117242,34 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4o-2024-11-20", + "provider_model_id": "gpt-5-mini-2025-08-07", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "web_search_options", - "logit_bias", - "logprobs", - "top_logprobs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "request": "0.05", - "threshold": "high", - "type": "search-threshold" - }, - { - "request": "0.035", - "threshold": "medium", - "type": "search-threshold" - }, - { - "request": "0.03", - "threshold": "low", - "type": "search-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, @@ -118452,29 +117277,33 @@ "hidden": false, "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", - "name": "OpenAI: GPT-4o (2024-11-20)", + "model_version_group_id": null, + "name": "OpenAI: GPT-5 Mini", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-2024-11-20", - "reasoning_config": null, + "permaslug": "openai/gpt-5-mini-2025-08-07", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o (2024-11-20)", - "slug": "openai/gpt-4o-2024-11-20", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Mini", + "slug": "openai/gpt-5-mini", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2024-05-13T00:00:00+00:00", + "context_length": 400000, + "created_at": "2025-08-07T17:23:22+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", + "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -118484,6 +117313,15 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": false, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -118493,7 +117331,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "3f4c883a-bd8b-4e01-ac1b-25cc9a17dd61", + "id": "50329d77-04e1-4979-a184-c33030289476", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -118502,18 +117340,24 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 64000, - "max_prompt_tokens": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": 272000, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2024-05-13T00:00:00+00:00", + "context_length": 400000, + "created_at": "2025-08-07T17:23:22+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as fast and 50% more cost-effective. GPT-4o also offers improved performance in processing non-English languages and enhanced visual capabilities.\n\nFor benchmarking against other models, it was briefly called [\"im-also-a-good-gpt2-chatbot\"](https://twitter.com/LiamFedus/status/1790064963966370209)\n\n#multimodal", - "features": {}, + "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, @@ -118521,34 +117365,35 @@ "hidden": false, "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", - "name": "OpenAI: GPT-4o", + "model_version_group_id": null, + "name": "OpenAI: GPT-5 Nano", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o", - "reasoning_config": null, + "permaslug": "openai/gpt-5-nano-2025-08-07", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o", - "slug": "openai/gpt-4o", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Nano", + "slug": "openai/gpt-5-nano", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4o:extended", - "model_variant_slug": "openai/gpt-4o:extended", + "model_variant_permaslug": "openai/gpt-5-nano-2025-08-07", + "model_variant_slug": "openai/gpt-5-nano", "moderation_required": true, - "name": "OpenAI | openai/gpt-4o:extended", + "name": "OpenAI | openai/gpt-5-nano-2025-08-07", "pricing": { - "completion": "0.000018", + "completion": "0.0000004", "discount": 0, - "image": "0.007225", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000005", + "prompt": "0.00000005", + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -118577,51 +117422,34 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4o-64k-output-alpha", + "provider_model_id": "gpt-5-nano-2025-08-07", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "web_search_options", - "logit_bias", - "logprobs", - "top_logprobs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "request": "0.05", - "threshold": "high", - "type": "search-threshold" - }, - { - "request": "0.035", - "threshold": "medium", - "type": "search-threshold" - }, - { - "request": "0.03", - "threshold": "low", - "type": "search-threshold" - } - ], - "variant": "extended" + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } }, - "features": {}, "group": "GPT", "has_text_output": true, "hf_slug": null, @@ -118629,29 +117457,37 @@ "hidden": false, "input_modalities": ["text", "image", "file"], "instruct_type": null, - "model_version_group_id": "76e36b33-358e-477a-be24-09f954fcea74", - "name": "OpenAI: GPT-4o (extended)", + "model_version_group_id": null, + "name": "OpenAI: GPT-5 Nano", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o", - "reasoning_config": null, + "permaslug": "openai/gpt-5-nano-2025-08-07", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o (extended)", - "slug": "openai/gpt-4o", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Nano", + "slug": "openai/gpt-5-nano", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2025-08-15T04:44:21.353523+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-10-06T18:51:03.215373+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences. Audio outputs are currently not supported. Audio tokens are priced at $40 per million input audio tokens.", + "description": "GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -118661,8 +117497,14 @@ "training": false }, "features": { - "supported_parameters": {}, - "supports_input_audio": true, + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, + "supports_input_audio": false, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -118672,7 +117514,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "d93227c0-2904-486b-a79e-e94419fce096", + "id": "86e3e664-d291-415d-a769-8e08b96a79e9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -118681,18 +117523,25 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, - "max_prompt_tokens": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": 272000, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2025-08-15T04:44:21.353523+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-10-06T18:51:03.215373+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences. Audio outputs are currently not supported. Audio tokens are priced at $40 per million input audio tokens.", + "description": "GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, @@ -118704,36 +117553,32 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["audio", "text"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4o Audio", + "name": "OpenAI: GPT-5 Pro", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-audio-preview", + "permaslug": "openai/gpt-5-pro-2025-10-06", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-4o Audio", - "slug": "openai/gpt-4o-audio-preview", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Pro", + "slug": "openai/gpt-5-pro", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4o-audio-preview", - "model_variant_slug": "openai/gpt-4o-audio-preview", + "model_variant_permaslug": "openai/gpt-5-pro-2025-10-06", + "model_variant_slug": "openai/gpt-5-pro", "moderation_required": true, - "name": "OpenAI | openai/gpt-4o-audio-preview", + "name": "OpenAI | openai/gpt-5-pro-2025-10-06", "pricing": { - "completion": "0.00001", + "completion": "0.00012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000025", - "request": "0", - "web_search": "0" + "prompt": "0.000015", + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { @@ -118766,34 +117611,31 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4o-audio-preview-2025-06-03", + "provider_model_id": "gpt-5-pro-2025-10-06", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "logit_bias", - "logprobs", - "top_logprobs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, @@ -118805,35 +117647,39 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["audio", "text"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4o Audio", + "name": "OpenAI: GPT-5 Pro", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-audio-preview", + "permaslug": "openai/gpt-5-pro-2025-10-06", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-4o Audio", - "slug": "openai/gpt-4o-audio-preview", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5 Pro", + "slug": "openai/gpt-5-pro", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2025-03-12T22:19:09.996816+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-11-13T18:58:25+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4o Search Previewis a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", + "description": "GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\n\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -118843,6 +117689,15 @@ "training": false }, "features": { + "is_mandatory_reasoning": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": false, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -118852,7 +117707,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f37536d3-fa09-47a3-b63c-831a1965253e", + "id": "764eb97f-8bab-4326-b29b-7a8799b00a70", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -118861,51 +117716,63 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, - "max_prompt_tokens": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": 272000, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2025-03-12T22:19:09.996816+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-11-13T18:58:25+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4o Search Previewis a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", - "features": {}, + "description": "GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\n\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4o Search Preview", + "name": "OpenAI: GPT-5.1", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-search-preview-2025-03-11", - "reasoning_config": null, + "permaslug": "openai/gpt-5.1-20251113", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o Search Preview", - "slug": "openai/gpt-4o-search-preview", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5.1", + "slug": "openai/gpt-5.1", + "updated_at": "2025-11-13T18:58:25.56227+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4o-search-preview-2025-03-11", - "model_variant_slug": "openai/gpt-4o-search-preview", + "model_variant_permaslug": "openai/gpt-5.1-20251113", + "model_variant_slug": "openai/gpt-5.1", "moderation_required": true, - "name": "OpenAI | openai/gpt-4o-search-preview-2025-03-11", + "name": "OpenAI | openai/gpt-5.1-20251113", "pricing": { "completion": "0.00001", "discount": 0, - "image": "0.003613", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000025", - "request": "0.035", - "web_search": "0" + "input_cache_read": "0.000000125", + "prompt": "0.00000125", + "web_search": "0.01" }, - "provider_display_name": "OpenAIAdapter", + "provider_display_name": "OpenAI", "provider_info": { "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", @@ -118918,7 +117785,7 @@ "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", "training": false }, - "displayName": "OpenAIAdapter", + "displayName": "OpenAI", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, @@ -118933,71 +117800,74 @@ "moderationRequired": true, "name": "OpenAI", "owners": ["{}"], - "slug": "openai", + "slug": "openai/default", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4o-search-preview-2025-03-11", + "provider_model_id": "gpt-5.1-2025-11-13", "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "openai", + "provider_slug": "openai/default", "quantization": "unknown", "supported_parameters": [ - "web_search_options", - "max_tokens", + "reasoning", + "include_reasoning", + "structured_outputs", "response_format", - "structured_outputs" + "seed", + "max_tokens", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, - "variable_pricings": [ - { - "request": "0.05", - "threshold": "high", - "type": "search-threshold" - }, - { - "request": "0.035", - "threshold": "medium", - "type": "search-threshold" - }, - { - "request": "0.03", - "threshold": "low", - "type": "search-threshold" - } - ], + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4o Search Preview", + "name": "OpenAI: GPT-5.1", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-search-preview-2025-03-11", - "reasoning_config": null, + "permaslug": "openai/gpt-5.1-20251113", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o Search Preview", - "slug": "openai/gpt-4o-search-preview", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5.1", + "slug": "openai/gpt-5.1", + "updated_at": "2025-11-13T18:58:25.56227+00:00", "warning_message": null }, { "author": "openai", "context_length": 128000, - "created_at": "2024-07-18T00:00:00+00:00", - "default_parameters": {}, + "created_at": "2025-11-13T18:58:22+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", + "description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.1 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.\n", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, "context_length": 128000, "data_policy": { @@ -119009,6 +117879,14 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, + "supports_implicit_caching": true, + "supports_input_audio": false, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -119018,7 +117896,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "77e40332-6f2a-4c48-bc14-e44596b30ce2", + "id": "f27c561c-0804-4e51-a96e-18bc1968212d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -119033,48 +117911,59 @@ "model": { "author": "openai", "context_length": 128000, - "created_at": "2024-07-18T00:00:00+00:00", - "default_parameters": {}, + "created_at": "2025-11-13T18:58:22+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", - "features": {}, + "description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.1 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.\n", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4o-mini", + "name": "OpenAI: GPT-5.1 Chat", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-mini", - "reasoning_config": null, + "permaslug": "openai/gpt-5.1-chat-20251113", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o-mini", - "slug": "openai/gpt-4o-mini", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5.1 Chat", + "slug": "openai/gpt-5.1-chat", + "updated_at": "2025-11-13T18:58:22.624591+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4o-mini", - "model_variant_slug": "openai/gpt-4o-mini", + "model_variant_permaslug": "openai/gpt-5.1-chat-20251113", + "model_variant_slug": "openai/gpt-5.1-chat", "moderation_required": true, - "name": "OpenAI | openai/gpt-4o-mini", + "name": "OpenAI | openai/gpt-5.1-chat-20251113", "pricing": { - "completion": "0.0000006", + "completion": "0.00001", "discount": 0, - "image": "0.000217", - "image_output": "0", - "input_cache_read": "0.000000075", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000125", + "prompt": "0.00000125", + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -119103,81 +117992,71 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4o-mini", + "provider_model_id": "gpt-5.1-chat-latest", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "web_search_options", - "logit_bias", - "logprobs", - "top_logprobs", - "tools", - "tool_choice" + "tool_choice", + "tools" ], "supports_multipart": true, "supports_reasoning": false, "supports_tool_parameters": true, - "variable_pricings": [ - { - "request": "0.03", - "threshold": "high", - "type": "search-threshold" - }, - { - "request": "0.0275", - "threshold": "medium", - "type": "search-threshold" - }, - { - "request": "0.025", - "threshold": "low", - "type": "search-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4o-mini", + "name": "OpenAI: GPT-5.1 Chat", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-mini", - "reasoning_config": null, + "permaslug": "openai/gpt-5.1-chat-20251113", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o-mini", - "slug": "openai/gpt-4o-mini", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5.1 Chat", + "slug": "openai/gpt-5.1-chat", + "updated_at": "2025-11-13T18:58:22.624591+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2024-07-18T00:00:00+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-11-13T18:58:18+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", + "description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -119187,6 +118066,13 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_implicit_caching": true, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -119196,7 +118082,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "ebcc1f0a-6621-4cdc-a93f-88a6e2cc2e15", + "id": "58caabab-f2a1-4a27-b098-b46b924efd27", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -119205,54 +118091,64 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, - "max_prompt_tokens": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": 272000, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2024-07-18T00:00:00+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-11-13T18:58:18+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs.\n\nAs their most advanced small model, it is many multiples more affordable than other recent frontier models, and more than 60% cheaper than [GPT-3.5 Turbo](/models/openai/gpt-3.5-turbo). It maintains SOTA intelligence, while being significantly more cost-effective.\n\nGPT-4o mini achieves an 82% score on MMLU and presently ranks higher than GPT-4 on chat preferences [common leaderboards](https://arena.lmsys.org/).\n\nCheck out the [launch announcement](https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/) to learn more.\n\n#multimodal", - "features": {}, + "description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4o-mini (2024-07-18)", + "name": "OpenAI: GPT-5.1-Codex", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-mini-2024-07-18", - "reasoning_config": null, + "permaslug": "openai/gpt-5.1-codex-20251113", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o-mini (2024-07-18)", - "slug": "openai/gpt-4o-mini-2024-07-18", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5.1-Codex", + "slug": "openai/gpt-5.1-codex", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4o-mini-2024-07-18", - "model_variant_slug": "openai/gpt-4o-mini-2024-07-18", + "model_variant_permaslug": "openai/gpt-5.1-codex-20251113", + "model_variant_slug": "openai/gpt-5.1-codex", "moderation_required": true, - "name": "OpenAI | openai/gpt-4o-mini-2024-07-18", + "name": "OpenAI | openai/gpt-5.1-codex-20251113", "pricing": { - "completion": "0.0000006", + "completion": "0.00001", "discount": 0, - "image": "0.007225", - "image_output": "0", - "input_cache_read": "0.000000075", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000125", + "prompt": "0.00000125" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -119281,81 +118177,73 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4o-mini-2024-07-18", + "provider_model_id": "gpt-5.1-codex", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "web_search_options", - "logit_bias", - "logprobs", - "top_logprobs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, - "variable_pricings": [ - { - "request": "0.03", - "threshold": "high", - "type": "search-threshold" - }, - { - "request": "0.0275", - "threshold": "medium", - "type": "search-threshold" - }, - { - "request": "0.025", - "threshold": "low", - "type": "search-threshold" - } - ], + "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4o-mini (2024-07-18)", + "name": "OpenAI: GPT-5.1-Codex", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-mini-2024-07-18", - "reasoning_config": null, + "permaslug": "openai/gpt-5.1-codex-20251113", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o-mini (2024-07-18)", - "slug": "openai/gpt-4o-mini-2024-07-18", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5.1-Codex", + "slug": "openai/gpt-5.1-codex", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2025-03-12T22:22:02.718344+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-12-04T20:08:54.10013+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", + "description": "GPT-5.1-Codex-Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic workflows spanning software engineering, mathematics, and research. \nGPT-5.1-Codex-Max delivers faster performance, improved reasoning, and higher token efficiency across the development lifecycle. ", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -119365,6 +118253,10 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supports_implicit_caching": true, + "supports_input_audio": false, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -119374,7 +118266,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5154b382-e458-4539-bf6d-cbadfbaa0600", + "id": "f225ad30-4cb3-4e28-b677-0eff326af277", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -119383,53 +118275,65 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2025-03-12T22:22:02.718344+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-12-04T20:08:54.10013+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.", - "features": {}, + "description": "GPT-5.1-Codex-Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic workflows spanning software engineering, mathematics, and research. \nGPT-5.1-Codex-Max delivers faster performance, improved reasoning, and higher token efficiency across the development lifecycle. ", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4o-mini Search Preview", + "name": "OpenAI: GPT-5.1-Codex-Max", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-mini-search-preview-2025-03-11", - "reasoning_config": null, + "permaslug": "openai/gpt-5.1-codex-max-20251204", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o-mini Search Preview", - "slug": "openai/gpt-4o-mini-search-preview", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5.1-Codex-Max", + "slug": "openai/gpt-5.1-codex-max", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-4o-mini-search-preview-2025-03-11", - "model_variant_slug": "openai/gpt-4o-mini-search-preview", + "model_variant_permaslug": "openai/gpt-5.1-codex-max-20251204", + "model_variant_slug": "openai/gpt-5.1-codex-max", "moderation_required": true, - "name": "OpenAI | openai/gpt-4o-mini-search-preview-2025-03-11", + "name": "OpenAI | openai/gpt-5.1-codex-max-20251204", "pricing": { - "completion": "0.0000006", + "completion": "0.00001", "discount": 0, - "image": "0.000217", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0.0275", - "web_search": "0" + "input_cache_read": "0.000000125", + "prompt": "0.00000125", + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "OpenAIResponsesAdapter", "baseUrl": "https://api.openai.com/v1", "byokEnabled": true, "dataPolicy": { @@ -119458,62 +118362,61 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-4o-mini-search-preview-2025-03-11", + "provider_model_id": "gpt-5.1-codex-max", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ - "web_search_options", + "reasoning", + "include_reasoning", + "seed", "max_tokens", "response_format", - "structured_outputs" + "structured_outputs", + "tool_choice", + "tools" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, - "variable_pricings": [ - { - "request": "0.03", - "threshold": "high", - "type": "search-threshold" - }, - { - "request": "0.0275", - "threshold": "medium", - "type": "search-threshold" - }, - { - "request": "0.025", - "threshold": "low", - "type": "search-threshold" - } - ], + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-4o-mini Search Preview", + "name": "OpenAI: GPT-5.1-Codex-Max", "output_modalities": ["text"], - "permaslug": "openai/gpt-4o-mini-search-preview-2025-03-11", - "reasoning_config": null, + "permaslug": "openai/gpt-5.1-codex-max-20251204", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "GPT-4o-mini Search Preview", - "slug": "openai/gpt-4o-mini-search-preview", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5.1-Codex-Max", + "slug": "openai/gpt-5.1-codex-max", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", "context_length": 400000, - "created_at": "2025-08-07T17:23:33+00:00", + "created_at": "2025-11-13T18:17:00.379348+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -119521,7 +118424,7 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "description": "GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, @@ -119535,15 +118438,10 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true }, - "supports_file_urls": true, - "supports_implicit_caching": true, - "supports_input_audio": false, - "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -119553,7 +118451,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "7c2f859a-7890-4e8e-b1de-1cd1c0a800b4", + "id": "27923ab8-2d0e-47ac-b04c-fc79d77ddbd5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -119562,13 +118460,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, - "max_prompt_tokens": 272000, + "max_completion_tokens": 100000, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", "context_length": 400000, - "created_at": "2025-08-07T17:23:33+00:00", + "created_at": "2025-11-13T18:17:00.379348+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -119576,11 +118474,9 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "description": "GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -119592,37 +118488,32 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5", + "name": "OpenAI: GPT-5.1-Codex-Mini", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-2025-08-07", + "permaslug": "openai/gpt-5.1-codex-mini-20251113", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5", - "slug": "openai/gpt-5", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GPT-5.1-Codex-Mini", + "slug": "openai/gpt-5.1-codex-mini", + "updated_at": "2025-11-13T18:54:22.09584+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5-2025-08-07", - "model_variant_slug": "openai/gpt-5", + "model_variant_permaslug": "openai/gpt-5.1-codex-mini-20251113", + "model_variant_slug": "openai/gpt-5.1-codex-mini", "moderation_required": true, - "name": "OpenAI | openai/gpt-5-2025-08-07", + "name": "OpenAI | openai/gpt-5.1-codex-mini-20251113", "pricing": { - "completion": "0.00001", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0.01" + "input_cache_read": "0.000000025", + "prompt": "0.00000025" }, "provider_display_name": "OpenAI", "provider_info": { @@ -119652,13 +118543,13 @@ "moderationRequired": true, "name": "OpenAI", "owners": ["{}"], - "slug": "openai/default", + "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5-2025-08-07", + "provider_model_id": "gpt-5.1-codex-mini", "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "openai/default", + "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ "reasoning", @@ -119677,9 +118568,7 @@ "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -119691,35 +118580,39 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5", + "name": "OpenAI: GPT-5.1-Codex-Mini", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-2025-08-07", + "permaslug": "openai/gpt-5.1-codex-mini-20251113", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5", - "slug": "openai/gpt-5", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GPT-5.1-Codex-Mini", + "slug": "openai/gpt-5.1-codex-mini", + "updated_at": "2025-11-13T18:54:22.09584+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2025-08-07T17:30:37.42514+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-12-10T18:02:55.765028+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.", + "description": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.\n\nBuilt for broad task coverage, GPT-5.2 delivers consistent gains across math, coding, sciende, and tool calling workloads, with more coherent long-form answers and improved tool-use reliability.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 400000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -119729,10 +118622,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "is_mandatory_reasoning": false, "supports_file_urls": true, "supports_implicit_caching": true, "supports_input_audio": false, @@ -119746,7 +118636,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "8f8398f5-523f-4676-8ba4-e82e3484efb1", + "id": "f00142c2-6a93-49ce-9e36-5593b904ce3b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -119755,18 +118645,23 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, - "max_prompt_tokens": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": 272000, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2025-08-07T17:30:37.42514+00:00", - "default_parameters": {}, + "context_length": 400000, + "created_at": "2025-12-10T18:02:55.765028+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.", + "description": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.\n\nBuilt for broad task coverage, GPT-5.2 delivers consistent gains across math, coding, sciende, and tool calling workloads, with more coherent long-form answers and improved tool-use reliability.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -119781,33 +118676,29 @@ "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Chat", + "name": "OpenAI: GPT-5.2", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-chat-2025-08-07", + "permaslug": "openai/gpt-5.2-20251211", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Chat", - "slug": "openai/gpt-5-chat", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5.2", + "slug": "openai/gpt-5.2", + "updated_at": "2025-12-11T18:20:09.990885+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5-chat-2025-08-07", - "model_variant_slug": "openai/gpt-5-chat", + "model_variant_permaslug": "openai/gpt-5.2-20251211", + "model_variant_slug": "openai/gpt-5.2", "moderation_required": true, - "name": "OpenAI | openai/gpt-5-chat-2025-08-07", + "name": "OpenAI | openai/gpt-5.2-20251211", "pricing": { - "completion": "0.00001", + "completion": "0.000014", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", + "input_cache_read": "0.000000175", + "prompt": "0.00000175", "web_search": "0.01" }, "provider_display_name": "OpenAI", @@ -119841,19 +118732,29 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5-chat-latest", + "provider_model_id": "gpt-5.2-2025-12-11", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", - "supported_parameters": ["structured_outputs", "response_format", "seed", "max_tokens"], + "supported_parameters": [ + "reasoning", + "include_reasoning", + "seed", + "max_tokens", + "response_format", + "structured_outputs", + "tools", + "tool_choice" + ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -119868,24 +118769,24 @@ "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Chat", + "name": "OpenAI: GPT-5.2", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-chat-2025-08-07", + "permaslug": "openai/gpt-5.2-20251211", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Chat", - "slug": "openai/gpt-5-chat", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GPT-5.2", + "slug": "openai/gpt-5.2", + "updated_at": "2025-12-11T18:20:09.990885+00:00", "warning_message": null }, { "author": "openai", - "context_length": 400000, - "created_at": "2025-09-23T16:03:23.098042+00:00", + "context_length": 128000, + "created_at": "2025-12-10T18:03:03.398082+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -119893,11 +118794,11 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", + "description": "GPT-5.2 Chat (AKA Instant) is the fast, lightweight member of the 5.2 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.2 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 400000, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -119907,13 +118808,10 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supports_file_urls": true, "supports_implicit_caching": true, "supports_input_audio": false, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -119923,7 +118821,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f10a63bc-2bcd-4726-9e75-1e482efd080c", + "id": "e510ac71-5b26-4f05-b214-f95ba530d45d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -119932,13 +118830,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, - "max_prompt_tokens": 272000, + "max_completion_tokens": 16384, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 400000, - "created_at": "2025-09-23T16:03:23.098042+00:00", + "context_length": 128000, + "created_at": "2025-12-10T18:03:03.398082+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -119946,8 +118844,9 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", + "description": "GPT-5.2 Chat (AKA Instant) is the fast, lightweight member of the 5.2 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.2 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -119959,37 +118858,33 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Codex", + "name": "OpenAI: GPT-5.2 Chat", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-codex", + "permaslug": "openai/gpt-5.2-chat-20251211", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Codex", - "slug": "openai/gpt-5-codex", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GPT-5.2 Chat", + "slug": "openai/gpt-5.2-chat", + "updated_at": "2025-12-11T18:20:07.580251+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5-codex", - "model_variant_slug": "openai/gpt-5-codex", + "model_variant_permaslug": "openai/gpt-5.2-chat-20251211", + "model_variant_slug": "openai/gpt-5.2-chat", "moderation_required": true, - "name": "OpenAI | openai/gpt-5-codex", + "name": "OpenAI | openai/gpt-5.2-chat-20251211", "pricing": { - "completion": "0.00001", + "completion": "0.000014", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000175", + "prompt": "0.00000175", + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { @@ -120022,28 +118917,27 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5-codex", + "provider_model_id": "gpt-5.2-chat-latest", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "seed", "max_tokens", + "response_format", + "structured_outputs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -120055,27 +118949,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Codex", + "name": "OpenAI: GPT-5.2 Chat", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-codex", + "permaslug": "openai/gpt-5.2-chat-20251211", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Codex", - "slug": "openai/gpt-5-codex", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GPT-5.2 Chat", + "slug": "openai/gpt-5.2-chat", + "updated_at": "2025-12-11T18:20:07.580251+00:00", "warning_message": null }, { "author": "openai", "context_length": 400000, - "created_at": "2025-10-14T13:19:46.029021+00:00", + "created_at": "2025-12-10T18:03:00.055991+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -120083,7 +118977,7 @@ }, "default_stops": [], "default_system": null, - "description": "[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's GPT-5 model with state-of-the-art image generation capabilities. It offers major improvements in reasoning, code quality, and user experience while incorporating GPT Image 1's superior instruction following, text rendering, and detailed image editing.", + "description": "GPT-5.2 Pro is OpenAI’s most advanced model, offering major improvements in agentic coding and long context performance over GPT-5 Pro. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, @@ -120098,12 +118992,7 @@ }, "features": { "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_file_urls": true, - "supports_implicit_caching": true, "supports_input_audio": false, "supports_native_web_search": true, "supports_tool_choice": { @@ -120115,7 +119004,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "be0ed145-8bfc-4aec-a62d-685ed334fe17", + "id": "1abae580-6f7a-4092-ae6c-b87ce6067f61", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -120130,7 +119019,7 @@ "model": { "author": "openai", "context_length": 400000, - "created_at": "2025-10-14T13:19:46.029021+00:00", + "created_at": "2025-12-10T18:03:00.055991+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -120138,7 +119027,7 @@ }, "default_stops": [], "default_system": null, - "description": "[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's GPT-5 model with state-of-the-art image generation capabilities. It offers major improvements in reasoning, code quality, and user experience while incorporating GPT Image 1's superior instruction following, text rendering, and detailed image editing.", + "description": "GPT-5.2 Pro is OpenAI’s most advanced model, offering major improvements in agentic coding and long context performance over GPT-5 Pro. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", "features": { "chat_template_config": { "should_hoist_and_merge_system_messages": null @@ -120157,33 +119046,28 @@ "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Image", - "output_modalities": ["image", "text"], - "permaslug": "openai/gpt-5-image", + "name": "OpenAI: GPT-5.2 Pro", + "output_modalities": ["text"], + "permaslug": "openai/gpt-5.2-pro-20251211", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Image", - "slug": "openai/gpt-5-image", + "short_name": "GPT-5.2 Pro", + "slug": "openai/gpt-5.2-pro", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5-image", - "model_variant_slug": "openai/gpt-5-image", + "model_variant_permaslug": "openai/gpt-5.2-pro-20251211", + "model_variant_slug": "openai/gpt-5.2-pro", "moderation_required": true, - "name": "OpenAI | openai/gpt-5-image", + "name": "OpenAI | openai/gpt-5.2-pro-20251211", "pricing": { - "completion": "0.00001", + "completion": "0.000168", "discount": 0, - "image": "0.00001", - "image_output": "0.00004", - "input_cache_read": "0.00000125", - "internal_reasoning": "0", - "prompt": "0.00001", - "request": "0", + "prompt": "0.000021", "web_search": "0.01" }, "provider_display_name": "OpenAI", @@ -120217,7 +119101,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5-2025-08-07", + "provider_model_id": "gpt-5.2-pro-2025-12-11", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -120225,18 +119109,10 @@ "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "seed", "max_tokens", - "temperature", - "top_p", - "stop", - "frequency_penalty", - "presence_penalty", - "logit_bias", - "logprobs", - "top_logprobs", + "response_format", + "structured_outputs", "tools", "tool_choice" ], @@ -120264,24 +119140,24 @@ "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Image", - "output_modalities": ["image", "text"], - "permaslug": "openai/gpt-5-image", + "name": "OpenAI: GPT-5.2 Pro", + "output_modalities": ["text"], + "permaslug": "openai/gpt-5.2-pro-20251211", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Image", - "slug": "openai/gpt-5-image", + "short_name": "GPT-5.2 Pro", + "slug": "openai/gpt-5.2-pro", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", "context_length": 400000, - "created_at": "2025-10-16T14:23:03.143259+00:00", + "created_at": "2026-01-14T16:48:35.067026+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -120289,7 +119165,7 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation. This natively multimodal model features superior instruction following, text rendering, and detailed image editing with reduced latency and cost. It excels at high-quality visual creation while maintaining strong text understanding, making it ideal for applications that require both efficient image generation and text processing at scale.", + "description": "GPT-5.2-Codex is an upgraded version of GPT-5.1-Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1-Codex, 5.2-Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, @@ -120303,15 +119179,8 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_file_urls": true, "supports_implicit_caching": true, - "supports_input_audio": false, - "supports_multipart": true, "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, @@ -120322,7 +119191,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "7c09094a-64ec-4d53-bd69-c165ac31c465", + "id": "dff70494-a1eb-4be2-b61f-9b2277db56c9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -120337,7 +119206,7 @@ "model": { "author": "openai", "context_length": 400000, - "created_at": "2025-10-16T14:23:03.143259+00:00", + "created_at": "2026-01-14T16:48:35.067026+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -120345,49 +119214,38 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation. This natively multimodal model features superior instruction following, text rendering, and detailed image editing with reduced latency and cost. It excels at high-quality visual creation while maintaining strong text understanding, making it ideal for applications that require both efficient image generation and text processing at scale.", + "description": "GPT-5.2-Codex is an upgraded version of GPT-5.1-Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1-Codex, 5.2-Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } + "chat_template_config": {}, + "reasoning_config": {} }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Image Mini", - "output_modalities": ["image", "text"], - "permaslug": "openai/gpt-5-image-mini", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "name": "OpenAI: GPT-5.2-Codex", + "output_modalities": ["text"], + "permaslug": "openai/gpt-5.2-codex-20260114", + "reasoning_config": {}, "router": null, - "short_name": "GPT-5 Image Mini", - "slug": "openai/gpt-5-image-mini", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GPT-5.2-Codex", + "slug": "openai/gpt-5.2-codex", + "updated_at": "2026-01-14T18:06:58.161562+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5-image-mini", - "model_variant_slug": "openai/gpt-5-image-mini", + "model_variant_permaslug": "openai/gpt-5.2-codex-20260114", + "model_variant_slug": "openai/gpt-5.2-codex", "moderation_required": true, - "name": "OpenAI | openai/gpt-5-image-mini", + "name": "OpenAI | openai/gpt-5.2-codex-20260114", "pricing": { - "completion": "0.000002", + "completion": "0.000014", "discount": 0, - "image": "0.0000025", - "image_output": "0.000008", - "input_cache_read": "0.00000025", - "internal_reasoning": "0", - "prompt": "0.0000025", - "request": "0", + "input_cache_read": "0.000000175", + "prompt": "0.00000175", "web_search": "0.01" }, "provider_display_name": "OpenAI", @@ -120421,7 +119279,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5-mini-2025-08-07", + "provider_model_id": "gpt-5.2-codex", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -120429,12 +119287,10 @@ "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "seed", "max_tokens", - "temperature", - "top_p", + "response_format", + "structured_outputs", "stop", "frequency_penalty", "presence_penalty", @@ -120451,46 +119307,39 @@ "variant": "standard" }, "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } + "chat_template_config": {}, + "reasoning_config": {} }, "group": "GPT", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Image Mini", - "output_modalities": ["image", "text"], - "permaslug": "openai/gpt-5-image-mini", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "name": "OpenAI: GPT-5.2-Codex", + "output_modalities": ["text"], + "permaslug": "openai/gpt-5.2-codex-20260114", + "reasoning_config": {}, "router": null, - "short_name": "GPT-5 Image Mini", - "slug": "openai/gpt-5-image-mini", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GPT-5.2-Codex", + "slug": "openai/gpt-5.2-codex", + "updated_at": "2026-01-14T18:06:58.161562+00:00", "warning_message": null }, { "author": "openai", - "context_length": 400000, - "created_at": "2025-08-07T17:23:27+00:00", + "context_length": 200000, + "created_at": "2024-12-17T18:26:39.576639+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.", + "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding. The o1 model series is trained with large-scale reinforcement learning to reason using chain of thought. \n\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology. Learn more in the [launch announcement](https://openai.com/o1).\n", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 400000, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -120500,15 +119349,7 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_file_urls": true, - "supports_implicit_caching": true, - "supports_input_audio": false, - "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -120518,7 +119359,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c4f66d01-20b0-4c27-a225-438ea22fda43", + "id": "82738f61-f3cb-44a5-b5d1-e6787ae64e3b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -120527,17 +119368,17 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, - "max_prompt_tokens": 272000, + "max_completion_tokens": 100000, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 400000, - "created_at": "2025-08-07T17:23:27+00:00", + "context_length": 200000, + "created_at": "2024-12-17T18:26:39.576639+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost. GPT-5 Mini is the successor to OpenAI's o4-mini model.", + "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding. The o1 model series is trained with large-scale reinforcement learning to reason using chain of thought. \n\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology. Learn more in the [launch announcement](https://openai.com/o1).\n", "features": { "reasoning_config": { "end_token": null, @@ -120553,34 +119394,29 @@ "input_modalities": ["text", "image", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Mini", + "name": "OpenAI: o1", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-mini-2025-08-07", + "permaslug": "openai/o1-2024-12-17", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Mini", - "slug": "openai/gpt-5-mini", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "o1", + "slug": "openai/o1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5-mini-2025-08-07", - "model_variant_slug": "openai/gpt-5-mini", + "model_variant_permaslug": "openai/o1-2024-12-17", + "model_variant_slug": "openai/o1", "moderation_required": true, - "name": "OpenAI | openai/gpt-5-mini-2025-08-07", + "name": "OpenAI | openai/o1-2024-12-17", "pricing": { - "completion": "0.000002", + "completion": "0.00006", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000025", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0.01" + "input_cache_read": "0.0000075", + "prompt": "0.000015" }, "provider_display_name": "OpenAI", "provider_info": { @@ -120613,23 +119449,21 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5-mini-2025-08-07", + "provider_model_id": "o1-2024-12-17", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "seed", "max_tokens", + "response_format", + "structured_outputs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" @@ -120649,32 +119483,32 @@ "input_modalities": ["text", "image", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Mini", + "name": "OpenAI: o1", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-mini-2025-08-07", + "permaslug": "openai/o1-2024-12-17", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Mini", - "slug": "openai/gpt-5-mini", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "o1", + "slug": "openai/o1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 400000, - "created_at": "2025-08-07T17:23:22+00:00", + "context_length": 200000, + "created_at": "2025-03-19T22:26:51.610039+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.", + "description": "The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o1-pro model uses more compute to think harder and provide consistently better answers.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 400000, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -120684,15 +119518,7 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_file_urls": true, - "supports_implicit_caching": true, - "supports_input_audio": false, - "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -120702,7 +119528,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "50329d77-04e1-4979-a184-c33030289476", + "id": "046ae30d-fe99-44b4-b020-21127e4342c7", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -120711,17 +119537,17 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, - "max_prompt_tokens": 272000, + "max_completion_tokens": 100000, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 400000, - "created_at": "2025-08-07T17:23:22+00:00", + "context_length": 200000, + "created_at": "2025-03-19T22:26:51.610039+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger counterparts, it retains key instruction-following and safety features. It is the successor to GPT-4.1-nano and offers a lightweight option for cost-sensitive or real-time applications.", + "description": "The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o1-pro model uses more compute to think harder and provide consistently better answers.", "features": { "reasoning_config": { "end_token": null, @@ -120737,34 +119563,28 @@ "input_modalities": ["text", "image", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Nano", + "name": "OpenAI: o1-pro", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-nano-2025-08-07", + "permaslug": "openai/o1-pro", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Nano", - "slug": "openai/gpt-5-nano", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "o1-pro", + "slug": "openai/o1-pro", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5-nano-2025-08-07", - "model_variant_slug": "openai/gpt-5-nano", + "model_variant_permaslug": "openai/o1-pro", + "model_variant_slug": "openai/o1-pro", "moderation_required": true, - "name": "OpenAI | openai/gpt-5-nano-2025-08-07", + "name": "OpenAI | openai/o1-pro", "pricing": { - "completion": "0.0000004", + "completion": "0.0006", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000005", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0.01" + "prompt": "0.00015" }, "provider_display_name": "OpenAI", "provider_info": { @@ -120797,7 +119617,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5-nano-2025-08-07", + "provider_model_id": "o1-pro", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -120805,16 +119625,14 @@ "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "seed", "max_tokens", - "tools", - "tool_choice" + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -120833,36 +119651,32 @@ "input_modalities": ["text", "image", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Nano", + "name": "OpenAI: o1-pro", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-nano-2025-08-07", + "permaslug": "openai/o1-pro", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Nano", - "slug": "openai/gpt-5-nano", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "o1-pro", + "slug": "openai/o1-pro", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 400000, - "created_at": "2025-10-06T18:51:03.215373+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "context_length": 200000, + "created_at": "2025-04-16T17:10:57.049467+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images. ", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 400000, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -120872,13 +119686,11 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true }, "supports_file_urls": true, - "supports_input_audio": false, "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, @@ -120889,7 +119701,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "86e3e664-d291-415d-a769-8e08b96a79e9", + "id": "42e72619-d01c-411c-a201-f991644768b7", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -120898,25 +119710,18 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, - "max_prompt_tokens": 272000, + "max_completion_tokens": 100000, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 400000, - "created_at": "2025-10-06T18:51:03.215373+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "context_length": 200000, + "created_at": "2025-04-16T17:10:57.049467+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images. ", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, @@ -120931,32 +119736,29 @@ "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Pro", + "name": "OpenAI: o3", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-pro-2025-10-06", + "permaslug": "openai/o3-2025-04-16", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Pro", - "slug": "openai/gpt-5-pro", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "o3", + "slug": "openai/o3", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5-pro-2025-10-06", - "model_variant_slug": "openai/gpt-5-pro", + "model_variant_permaslug": "openai/o3-2025-04-16", + "model_variant_slug": "openai/o3", "moderation_required": true, - "name": "OpenAI | openai/gpt-5-pro-2025-10-06", + "name": "OpenAI | openai/o3-2025-04-16", "pricing": { - "completion": "0.00012", + "completion": "0.000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000015", - "request": "0", + "input_cache_read": "0.0000005", + "prompt": "0.000002", "web_search": "0.01" }, "provider_display_name": "OpenAI", @@ -120990,7 +119792,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5-pro-2025-10-06", + "provider_model_id": "o3-2025-04-16", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -121012,9 +119814,6 @@ "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, @@ -121029,24 +119828,24 @@ "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5 Pro", + "name": "OpenAI: o3", "output_modalities": ["text"], - "permaslug": "openai/gpt-5-pro-2025-10-06", + "permaslug": "openai/o3-2025-04-16", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5 Pro", - "slug": "openai/gpt-5-pro", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "o3", + "slug": "openai/o3", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 400000, - "created_at": "2025-11-13T18:58:25+00:00", + "context_length": 200000, + "created_at": "2025-10-10T20:54:21.971349+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -121054,11 +119853,11 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\n\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5", + "description": "o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 400000, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -121068,13 +119867,11 @@ "training": false }, "features": { - "is_mandatory_reasoning": false, "supported_parameters": { "response_format": true, "structured_outputs": true }, "supports_file_urls": true, - "supports_implicit_caching": true, "supports_input_audio": false, "supports_native_web_search": true, "supports_tool_choice": { @@ -121086,7 +119883,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "764eb97f-8bab-4326-b29b-7a8799b00a70", + "id": "a697c6df-cbb4-4a6b-8b1e-04e92f00146a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -121095,13 +119892,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, - "max_prompt_tokens": 272000, + "max_completion_tokens": 100000, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 400000, - "created_at": "2025-11-13T18:58:25+00:00", + "context_length": 200000, + "created_at": "2025-10-10T20:54:21.971349+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -121109,9 +119906,8 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks. The model produces clearer, more grounded explanations with reduced jargon, making it easier to follow even on technical or multi-step problems.\n\nBuilt for broad task coverage, GPT-5.1 delivers consistent gains across math, coding, and structured analysis workloads, with more coherent long-form answers and improved tool-use reliability. It also features refined conversational alignment, enabling warmer, more intuitive responses without compromising precision. GPT-5.1 serves as the primary full-capability successor to GPT-5", + "description": "o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -121126,33 +119922,29 @@ "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.1", + "name": "OpenAI: o3 Deep Research", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.1-20251113", + "permaslug": "openai/o3-deep-research-2025-06-26", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.1", - "slug": "openai/gpt-5.1", - "updated_at": "2025-11-13T18:58:25.56227+00:00", + "short_name": "o3 Deep Research", + "slug": "openai/o3-deep-research", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5.1-20251113", - "model_variant_slug": "openai/gpt-5.1", + "model_variant_permaslug": "openai/o3-deep-research-2025-06-26", + "model_variant_slug": "openai/o3-deep-research", "moderation_required": true, - "name": "OpenAI | openai/gpt-5.1-20251113", + "name": "OpenAI | openai/o3-deep-research-2025-06-26", "pricing": { - "completion": "0.00001", + "completion": "0.00004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", + "input_cache_read": "0.0000025", + "prompt": "0.00001", "web_search": "0.01" }, "provider_display_name": "OpenAI", @@ -121183,13 +119975,13 @@ "moderationRequired": true, "name": "OpenAI", "owners": ["{}"], - "slug": "openai/default", + "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5.1-2025-11-13", + "provider_model_id": "o3-deep-research-2025-06-26", "provider_name": "OpenAI", "provider_region": null, - "provider_slug": "openai/default", + "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ "reasoning", @@ -121198,6 +119990,14 @@ "response_format", "seed", "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "logit_bias", + "logprobs", + "top_logprobs", "tools", "tool_choice" ], @@ -121208,7 +120008,6 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -121223,36 +120022,32 @@ "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.1", + "name": "OpenAI: o3 Deep Research", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.1-20251113", + "permaslug": "openai/o3-deep-research-2025-06-26", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.1", - "slug": "openai/gpt-5.1", - "updated_at": "2025-11-13T18:58:25.56227+00:00", + "short_name": "o3 Deep Research", + "slug": "openai/o3-deep-research", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2025-11-13T18:58:22+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "context_length": 200000, + "created_at": "2025-01-31T19:28:41.132151+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.1 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.\n", + "description": "OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding.\n\nThis model supports the `reasoning_effort` parameter, which can be set to \"high\", \"medium\", or \"low\" to control the thinking time of the model. The default is \"medium\". OpenRouter also offers the model slug `openai/o3-mini-high` to default the parameter to \"high\".\n\nThe model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -121262,14 +120057,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_file_urls": true, - "supports_implicit_caching": true, - "supports_input_audio": false, - "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -121279,7 +120067,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f27c561c-0804-4e51-a96e-18bc1968212d", + "id": "e93c942e-7f8f-410d-8478-21ec37bc6b0d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -121288,23 +120076,18 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 100000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2025-11-13T18:58:22+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "context_length": 200000, + "created_at": "2025-01-31T19:28:41.132151+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.1 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.\n", + "description": "OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding.\n\nThis model supports the `reasoning_effort` parameter, which can be set to \"high\", \"medium\", or \"low\" to control the thinking time of the model. The default is \"medium\". OpenRouter also offers the model slug `openai/o3-mini-high` to default the parameter to \"high\".\n\nThe model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -121316,37 +120099,32 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text"], + "input_modalities": ["text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.1 Chat", + "name": "OpenAI: o3 Mini", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.1-chat-20251113", + "permaslug": "openai/o3-mini-2025-01-31", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.1 Chat", - "slug": "openai/gpt-5.1-chat", - "updated_at": "2025-11-13T18:58:22.624591+00:00", + "short_name": "o3 Mini", + "slug": "openai/o3-mini", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5.1-chat-20251113", - "model_variant_slug": "openai/gpt-5.1-chat", + "model_variant_permaslug": "openai/o3-mini-2025-01-31", + "model_variant_slug": "openai/o3-mini", "moderation_required": true, - "name": "OpenAI | openai/gpt-5.1-chat-20251113", + "name": "OpenAI | openai/o3-mini-2025-01-31", "pricing": { - "completion": "0.00001", + "completion": "0.0000044", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0.01" + "input_cache_read": "0.00000055", + "prompt": "0.0000011" }, "provider_display_name": "OpenAI", "provider_info": { @@ -121379,18 +120157,18 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5.1-chat-latest", + "provider_model_id": "o3-mini-2025-01-31", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", "seed", "max_tokens", - "tool_choice", - "tools" + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, @@ -121399,7 +120177,6 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -121411,27 +120188,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text"], + "input_modalities": ["text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.1 Chat", + "name": "OpenAI: o3 Mini", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.1-chat-20251113", + "permaslug": "openai/o3-mini-2025-01-31", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.1 Chat", - "slug": "openai/gpt-5.1-chat", - "updated_at": "2025-11-13T18:58:22.624591+00:00", + "short_name": "o3 Mini", + "slug": "openai/o3-mini", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 400000, - "created_at": "2025-11-13T18:58:18+00:00", + "context_length": 200000, + "created_at": "2025-02-12T15:03:31.504126+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -121439,11 +120216,11 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", + "description": "OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high. \n\no3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 400000, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -121453,13 +120230,7 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_implicit_caching": true, - "supports_input_audio": false, + "supports_file_urls": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -121469,7 +120240,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "58caabab-f2a1-4a27-b098-b46b924efd27", + "id": "4a8663f2-89f3-40e9-9b50-e4838fff0155", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -121478,13 +120249,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, - "max_prompt_tokens": 272000, + "max_completion_tokens": 100000, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 400000, - "created_at": "2025-11-13T18:58:18+00:00", + "context_length": 200000, + "created_at": "2025-02-12T15:03:31.504126+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -121492,9 +120263,11 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.", + "description": "OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high. \n\no3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, @@ -121506,37 +120279,32 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.1-Codex", + "name": "OpenAI: o3 Mini High", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.1-codex-20251113", + "permaslug": "openai/o3-mini-high-2025-01-31", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.1-Codex", - "slug": "openai/gpt-5.1-codex", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "o3 Mini High", + "slug": "openai/o3-mini-high", + "updated_at": "2026-01-08T00:52:39.57688+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5.1-codex-20251113", - "model_variant_slug": "openai/gpt-5.1-codex", + "model_variant_permaslug": "openai/o3-mini-high-2025-01-31", + "model_variant_slug": "openai/o3-mini-high", "moderation_required": true, - "name": "OpenAI | openai/gpt-5.1-codex-20251113", + "name": "OpenAI | openai/o3-mini-high-2025-01-31", "pricing": { - "completion": "0.00001", + "completion": "0.0000044", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000055", + "prompt": "0.0000011" }, "provider_display_name": "OpenAI", "provider_info": { @@ -121569,29 +120337,29 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5.1-codex", + "provider_model_id": "o3-mini-2025-01-31", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "seed", "max_tokens", + "response_format", + "structured_outputs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, "reasoning_config": { "end_token": null, "start_token": null, @@ -121603,27 +120371,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.1-Codex", + "name": "OpenAI: o3 Mini High", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.1-codex-20251113", + "permaslug": "openai/o3-mini-high-2025-01-31", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.1-Codex", - "slug": "openai/gpt-5.1-codex", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "o3 Mini High", + "slug": "openai/o3-mini-high", + "updated_at": "2026-01-08T00:52:39.57688+00:00", "warning_message": null }, { "author": "openai", - "context_length": 400000, - "created_at": "2025-12-04T20:08:54.10013+00:00", + "context_length": 200000, + "created_at": "2025-06-10T23:32:32.266087+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -121631,11 +120399,11 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.1-Codex-Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic workflows spanning software engineering, mathematics, and research. \nGPT-5.1-Codex-Max delivers faster performance, improved reasoning, and higher token efficiency across the development lifecycle. ", + "description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.\n\nNote that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 400000, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -121645,9 +120413,11 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supports_implicit_caching": true, - "supports_input_audio": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_file_urls": true, "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, @@ -121658,7 +120428,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f225ad30-4cb3-4e28-b677-0eff326af277", + "id": "b8222376-66ee-4b89-a7c9-e627ba35db79", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -121667,13 +120437,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, + "max_completion_tokens": 100000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 400000, - "created_at": "2025-12-04T20:08:54.10013+00:00", + "context_length": 200000, + "created_at": "2025-06-10T23:32:32.266087+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -121681,7 +120451,7 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.1-Codex-Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic workflows spanning software engineering, mathematics, and research. \nGPT-5.1-Codex-Max delivers faster performance, improved reasoning, and higher token efficiency across the development lifecycle. ", + "description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.\n\nNote that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations", "features": { "chat_template_config": {}, "reasoning_config": { @@ -121695,37 +120465,32 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text", "file", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.1-Codex-Max", + "name": "OpenAI: o3 Pro", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.1-codex-max-20251204", + "permaslug": "openai/o3-pro-2025-06-10", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.1-Codex-Max", - "slug": "openai/gpt-5.1-codex-max", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "o3 Pro", + "slug": "openai/o3-pro", + "updated_at": "2026-01-14T02:46:50.629344+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5.1-codex-max-20251204", - "model_variant_slug": "openai/gpt-5.1-codex-max", + "model_variant_permaslug": "openai/o3-pro-2025-06-10", + "model_variant_slug": "openai/o3-pro", "moderation_required": true, - "name": "OpenAI | openai/gpt-5.1-codex-max-20251204", + "name": "OpenAI | openai/o3-pro-2025-06-10", "pricing": { - "completion": "0.00001", + "completion": "0.00008", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000125", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "prompt": "0.00002", + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { @@ -121758,7 +120523,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5.1-codex-max", + "provider_model_id": "o3-pro-2025-06-10", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -121766,12 +120531,12 @@ "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", - "tool_choice", - "tools" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, @@ -121792,39 +120557,35 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text", "file", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.1-Codex-Max", + "name": "OpenAI: o3 Pro", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.1-codex-max-20251204", + "permaslug": "openai/o3-pro-2025-06-10", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.1-Codex-Max", - "slug": "openai/gpt-5.1-codex-max", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "o3 Pro", + "slug": "openai/o3-pro", + "updated_at": "2026-01-14T02:46:50.629344+00:00", "warning_message": null }, { "author": "openai", - "context_length": 400000, - "created_at": "2025-11-13T18:17:00.379348+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "context_length": 200000, + "created_at": "2025-04-16T16:29:02.980764+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex", + "description": "OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 400000, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -121838,6 +120599,8 @@ "response_format": true, "structured_outputs": true }, + "supports_file_urls": true, + "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -121847,7 +120610,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "27923ab8-2d0e-47ac-b04c-fc79d77ddbd5", + "id": "bd121898-b27c-4e2c-bc92-278627465a54", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -121861,18 +120624,13 @@ "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 400000, - "created_at": "2025-11-13T18:17:00.379348+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "context_length": 200000, + "created_at": "2025-04-16T16:29:02.980764+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex", + "description": "OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -121884,37 +120642,33 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.1-Codex-Mini", + "name": "OpenAI: o4 Mini", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.1-codex-mini-20251113", + "permaslug": "openai/o4-mini-2025-04-16", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.1-Codex-Mini", - "slug": "openai/gpt-5.1-codex-mini", - "updated_at": "2025-11-13T18:54:22.09584+00:00", + "short_name": "o4 Mini", + "slug": "openai/o4-mini", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5.1-codex-mini-20251113", - "model_variant_slug": "openai/gpt-5.1-codex-mini", + "model_variant_permaslug": "openai/o4-mini-2025-04-16", + "model_variant_slug": "openai/o4-mini", "moderation_required": true, - "name": "OpenAI | openai/gpt-5.1-codex-mini-20251113", + "name": "OpenAI | openai/o4-mini-2025-04-16", "pricing": { - "completion": "0.000002", + "completion": "0.0000044", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000025", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000275", + "prompt": "0.0000011", + "web_search": "0.01" }, "provider_display_name": "OpenAI", "provider_info": { @@ -121947,7 +120701,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5.1-codex-mini", + "provider_model_id": "o4-mini-2025-04-16", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -121969,7 +120723,6 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -121981,27 +120734,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.1-Codex-Mini", + "name": "OpenAI: o4 Mini", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.1-codex-mini-20251113", + "permaslug": "openai/o4-mini-2025-04-16", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.1-Codex-Mini", - "slug": "openai/gpt-5.1-codex-mini", - "updated_at": "2025-11-13T18:54:22.09584+00:00", + "short_name": "o4 Mini", + "slug": "openai/o4-mini", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 400000, - "created_at": "2025-12-10T18:02:55.765028+00:00", + "context_length": 200000, + "created_at": "2025-10-10T20:54:02.725273+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -122009,11 +120762,11 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.\n\nBuilt for broad task coverage, GPT-5.2 delivers consistent gains across math, coding, sciende, and tool calling workloads, with more coherent long-form answers and improved tool-use reliability.", + "description": "o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 400000, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -122023,9 +120776,11 @@ "training": false }, "features": { - "is_mandatory_reasoning": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_file_urls": true, - "supports_implicit_caching": true, "supports_input_audio": false, "supports_native_web_search": true, "supports_tool_choice": { @@ -122037,7 +120792,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f00142c2-6a93-49ce-9e36-5593b904ce3b", + "id": "97f9f7c3-9e55-4f6d-8de4-459d4f23e2a9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -122046,13 +120801,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, - "max_prompt_tokens": 272000, + "max_completion_tokens": 100000, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 400000, - "created_at": "2025-12-10T18:02:55.765028+00:00", + "context_length": 200000, + "created_at": "2025-10-10T20:54:02.725273+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -122060,9 +120815,8 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly to simple queries while spending more depth on complex tasks.\n\nBuilt for broad task coverage, GPT-5.2 delivers consistent gains across math, coding, sciende, and tool calling workloads, with more coherent long-form answers and improved tool-use reliability.", + "description": "o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -122077,33 +120831,29 @@ "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.2", + "name": "OpenAI: o4 Mini Deep Research", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.2-20251211", + "permaslug": "openai/o4-mini-deep-research-2025-06-26", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.2", - "slug": "openai/gpt-5.2", - "updated_at": "2025-12-11T18:20:09.990885+00:00", + "short_name": "o4 Mini Deep Research", + "slug": "openai/o4-mini-deep-research", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5.2-20251211", - "model_variant_slug": "openai/gpt-5.2", + "model_variant_permaslug": "openai/o4-mini-deep-research-2025-06-26", + "model_variant_slug": "openai/o4-mini-deep-research", "moderation_required": true, - "name": "OpenAI | openai/gpt-5.2-20251211", + "name": "OpenAI | openai/o4-mini-deep-research-2025-06-26", "pricing": { - "completion": "0.000014", + "completion": "0.000008", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000175", - "internal_reasoning": "0", - "prompt": "0.00000175", - "request": "0", + "input_cache_read": "0.0000005", + "prompt": "0.000002", "web_search": "0.01" }, "provider_display_name": "OpenAI", @@ -122137,7 +120887,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5.2-2025-12-11", + "provider_model_id": "o4-mini-deep-research-2025-06-26", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -122145,10 +120895,18 @@ "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "logit_bias", + "logprobs", + "top_logprobs", "tools", "tool_choice" ], @@ -122159,7 +120917,6 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -122174,24 +120931,24 @@ "input_modalities": ["file", "image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.2", + "name": "OpenAI: o4 Mini Deep Research", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.2-20251211", + "permaslug": "openai/o4-mini-deep-research-2025-06-26", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.2", - "slug": "openai/gpt-5.2", - "updated_at": "2025-12-11T18:20:09.990885+00:00", + "short_name": "o4 Mini Deep Research", + "slug": "openai/o4-mini-deep-research", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 128000, - "created_at": "2025-12-10T18:03:03.398082+00:00", + "context_length": 200000, + "created_at": "2025-04-16T17:23:32.042157+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -122199,11 +120956,11 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.2 Chat (AKA Instant) is the fast, lightweight member of the 5.2 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.2 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.", + "description": "OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high. \n\nOpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -122213,9 +120970,11 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_file_urls": true, - "supports_implicit_caching": true, - "supports_input_audio": false, "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, @@ -122226,7 +120985,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "e510ac71-5b26-4f05-b214-f95ba530d45d", + "id": "60020533-2fb2-4aa1-9454-181029fd52de", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -122235,13 +120994,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 100000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 128000, - "created_at": "2025-12-10T18:03:03.398082+00:00", + "context_length": 200000, + "created_at": "2025-04-16T17:23:32.042157+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -122249,7 +121008,7 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.2 Chat (AKA Instant) is the fast, lightweight member of the 5.2 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on harder queries, improving accuracy on math, coding, and multi-step tasks without slowing down typical conversations. The model is warmer and more conversational by default, with better instruction following and more stable short-form reasoning. GPT-5.2 Chat is designed for high-throughput, interactive workloads where responsiveness and consistency matter more than deep deliberation.", + "description": "OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high. \n\nOpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -122263,36 +121022,32 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.2 Chat", + "name": "OpenAI: o4 Mini High", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.2-chat-20251211", + "permaslug": "openai/o4-mini-high-2025-04-16", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.2 Chat", - "slug": "openai/gpt-5.2-chat", - "updated_at": "2025-12-11T18:20:07.580251+00:00", + "short_name": "o4 Mini High", + "slug": "openai/o4-mini-high", + "updated_at": "2026-01-08T00:53:50.4178+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5.2-chat-20251211", - "model_variant_slug": "openai/gpt-5.2-chat", + "model_variant_permaslug": "openai/o4-mini-high-2025-04-16", + "model_variant_slug": "openai/o4-mini-high", "moderation_required": true, - "name": "OpenAI | openai/gpt-5.2-chat-20251211", + "name": "OpenAI | openai/o4-mini-high-2025-04-16", "pricing": { - "completion": "0.000014", + "completion": "0.0000044", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.000000175", - "internal_reasoning": "0", - "prompt": "0.00000175", - "request": "0", + "input_cache_read": "0.000000275", + "prompt": "0.0000011", "web_search": "0.01" }, "provider_display_name": "OpenAI", @@ -122326,21 +121081,23 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5.2-chat-latest", + "provider_model_id": "o4-mini-2025-04-16", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "seed", "max_tokens", - "response_format", - "structured_outputs", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" @@ -122358,27 +121115,27 @@ "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text"], + "input_modalities": ["image", "text", "file"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.2 Chat", + "name": "OpenAI: o4 Mini High", "output_modalities": ["text"], - "permaslug": "openai/gpt-5.2-chat-20251211", + "permaslug": "openai/o4-mini-high-2025-04-16", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.2 Chat", - "slug": "openai/gpt-5.2-chat", - "updated_at": "2025-12-11T18:20:07.580251+00:00", + "short_name": "o4 Mini High", + "slug": "openai/o4-mini-high", + "updated_at": "2026-01-08T00:53:50.4178+00:00", "warning_message": null }, { "author": "openai", - "context_length": 400000, - "created_at": "2025-12-10T18:03:00.055991+00:00", + "context_length": 8192, + "created_at": "2025-10-30T22:21:06.503374+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -122386,11 +121143,11 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.2 Pro is OpenAI’s most advanced model, offering major improvements in agentic coding and long context performance over GPT-5 Pro. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "description": "text-embedding-3-large is OpenAI's most capable embedding model for both english and non-english tasks. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 400000, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -122400,10 +121157,7 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supports_file_urls": true, "supports_input_audio": false, - "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -122413,7 +121167,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "1abae580-6f7a-4092-ae6c-b87ce6067f61", + "id": "8083d8ef-5e78-4124-8536-f65ba99e2a8a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -122422,13 +121176,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, - "max_prompt_tokens": 272000, + "max_completion_tokens": null, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 400000, - "created_at": "2025-12-10T18:03:00.055991+00:00", + "context_length": 8192, + "created_at": "2025-10-30T22:21:06.503374+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -122436,52 +121190,45 @@ }, "default_stops": [], "default_system": null, - "description": "GPT-5.2 Pro is OpenAI’s most advanced model, offering major improvements in agentic coding and long context performance over GPT-5 Pro. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "description": "text-embedding-3-large is OpenAI's most capable embedding model for both english and non-english tasks. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", - "has_text_output": true, + "group": "Other", + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.2 Pro", - "output_modalities": ["text"], - "permaslug": "openai/gpt-5.2-pro-20251211", + "name": "OpenAI: Text Embedding 3 Large", + "output_modalities": ["embeddings"], + "permaslug": "openai/text-embedding-3-large", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.2 Pro", - "slug": "openai/gpt-5.2-pro", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Text Embedding 3 Large", + "slug": "openai/text-embedding-3-large", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-5.2-pro-20251211", - "model_variant_slug": "openai/gpt-5.2-pro", + "model_variant_permaslug": "openai/text-embedding-3-large", + "model_variant_slug": "openai/text-embedding-3-large", "moderation_required": true, - "name": "OpenAI | openai/gpt-5.2-pro-20251211", + "name": "OpenAI | openai/text-embedding-3-large", "pricing": { - "completion": "0.000168", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000021", - "request": "0", - "web_search": "0.01" + "prompt": "0.00000013" }, "provider_display_name": "OpenAI", "provider_info": { @@ -122514,71 +121261,77 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "gpt-5.2-pro-2025-12-11", + "provider_model_id": "text-embedding-3-large", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "seed", "max_tokens", "response_format", "structured_outputs", - "tools", - "tool_choice" + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "logit_bias", + "logprobs", + "top_logprobs" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", - "has_text_output": true, + "group": "Other", + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: GPT-5.2 Pro", - "output_modalities": ["text"], - "permaslug": "openai/gpt-5.2-pro-20251211", + "name": "OpenAI: Text Embedding 3 Large", + "output_modalities": ["embeddings"], + "permaslug": "openai/text-embedding-3-large", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "GPT-5.2 Pro", - "slug": "openai/gpt-5.2-pro", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Text Embedding 3 Large", + "slug": "openai/text-embedding-3-large", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 200000, - "created_at": "2024-12-17T18:26:39.576639+00:00", - "default_parameters": {}, + "context_length": 8192, + "created_at": "2025-10-30T20:50:55+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding. The o1 model series is trained with large-scale reinforcement learning to reason using chain of thought. \n\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology. Learn more in the [launch announcement](https://openai.com/o1).\n", + "description": " text-embedding-3-small is OpenAI's improved, more performant version of the ada embedding model. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -122588,7 +121341,7 @@ "training": false }, "features": { - "supports_file_urls": true, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -122598,7 +121351,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "82738f61-f3cb-44a5-b5d1-e6787ae64e3b", + "id": "d88ee4ad-6cb6-4b9e-b84c-5ca8a4c58e58", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -122607,60 +121360,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 200000, - "created_at": "2024-12-17T18:26:39.576639+00:00", - "default_parameters": {}, + "context_length": 8192, + "created_at": "2025-10-30T20:50:55+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding. The o1 model series is trained with large-scale reinforcement learning to reason using chain of thought. \n\nThe o1 models are optimized for math, science, programming, and other STEM-related tasks. They consistently exhibit PhD-level accuracy on benchmarks in physics, chemistry, and biology. Learn more in the [launch announcement](https://openai.com/o1).\n", + "description": " text-embedding-3-small is OpenAI's improved, more performant version of the ada embedding model. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", - "has_text_output": true, + "group": "Other", + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o1", - "output_modalities": ["text"], - "permaslug": "openai/o1-2024-12-17", + "name": "OpenAI: Text Embedding 3 Small", + "output_modalities": ["embeddings"], + "permaslug": "openai/text-embedding-3-small", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "o1", - "slug": "openai/o1", + "short_name": "Text Embedding 3 Small", + "slug": "openai/text-embedding-3-small", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/o1-2024-12-17", - "model_variant_slug": "openai/o1", + "model_variant_permaslug": "openai/text-embedding-3-small", + "model_variant_slug": "openai/text-embedding-3-small", "moderation_required": true, - "name": "OpenAI | openai/o1-2024-12-17", + "name": "OpenAI | openai/text-embedding-3-small", "pricing": { - "completion": "0.00006", + "completion": "0", "discount": 0, - "image": "0.021675", - "image_output": "0", - "input_cache_read": "0.0000075", - "internal_reasoning": "0", - "prompt": "0.000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000002" }, "provider_display_name": "OpenAI", "provider_info": { @@ -122693,7 +121445,7 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "o1-2024-12-17", + "provider_model_id": "text-embedding-3-small", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", @@ -122703,56 +121455,67 @@ "max_tokens", "response_format", "structured_outputs", - "tools", - "tool_choice" + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "logit_bias", + "logprobs", + "top_logprobs" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", - "has_text_output": true, + "group": "Other", + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o1", - "output_modalities": ["text"], - "permaslug": "openai/o1-2024-12-17", + "name": "OpenAI: Text Embedding 3 Small", + "output_modalities": ["embeddings"], + "permaslug": "openai/text-embedding-3-small", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "o1", - "slug": "openai/o1", + "short_name": "Text Embedding 3 Small", + "slug": "openai/text-embedding-3-small", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 200000, - "created_at": "2025-03-19T22:26:51.610039+00:00", - "default_parameters": {}, + "context_length": 8192, + "created_at": "2025-10-30T23:09:58.907998+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o1-pro model uses more compute to think harder and provide consistently better answers.", + "description": "text-embedding-ada-002 is OpenAI's legacy text embedding model.", "endpoint": { "adapter_name": "OpenAIResponsesAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", @@ -122762,7 +121525,7 @@ "training": false }, "features": { - "supports_file_urls": true, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -122772,7 +121535,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "046ae30d-fe99-44b4-b020-21127e4342c7", + "id": "1a2e0fcd-fb63-48b8-8257-5d2891aa8d25", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -122781,57 +121544,62 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 200000, - "created_at": "2025-03-19T22:26:51.610039+00:00", - "default_parameters": {}, + "context_length": 8192, + "created_at": "2025-10-30T23:09:58.907998+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o1-pro model uses more compute to think harder and provide consistently better answers.", + "description": "text-embedding-ada-002 is OpenAI's legacy text embedding model.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", - "has_text_output": true, + "group": "Other", + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o1-pro", - "output_modalities": ["text"], - "permaslug": "openai/o1-pro", + "name": "OpenAI: Text Embedding Ada 002", + "output_modalities": ["embeddings"], + "permaslug": "openai/text-embedding-ada-002", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "o1-pro", - "slug": "openai/o1-pro", + "short_name": "Text Embedding Ada 002", + "slug": "openai/text-embedding-ada-002", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/o1-pro", - "model_variant_slug": "openai/o1-pro", + "model_variant_permaslug": "openai/text-embedding-ada-002", + "model_variant_slug": "openai/text-embedding-ada-002", "moderation_required": true, - "name": "OpenAI | openai/o1-pro", + "name": "OpenAI | openai/text-embedding-ada-002", "pricing": { - "completion": "0.0006", + "completion": "0", "discount": 0, - "image": "0.21675", + "image": "0", "image_output": "0", "internal_reasoning": "0", - "prompt": "0.00015", + "prompt": "0.0000001", "request": "0", "web_search": "0" }, @@ -122866,611 +121634,598 @@ "slug": "openai", "statusPageUrl": "https://status.openai.com/" }, - "provider_model_id": "o1-pro", + "provider_model_id": "text-embedding-ada-002", "provider_name": "OpenAI", "provider_region": null, "provider_slug": "openai", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "seed", "max_tokens", "response_format", - "structured_outputs" + "structured_outputs", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "logit_bias", + "logprobs", + "top_logprobs" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", - "has_text_output": true, + "group": "Other", + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o1-pro", - "output_modalities": ["text"], - "permaslug": "openai/o1-pro", + "name": "OpenAI: Text Embedding Ada 002", + "output_modalities": ["embeddings"], + "permaslug": "openai/text-embedding-ada-002", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "o1-pro", - "slug": "openai/o1-pro", + "short_name": "Text Embedding Ada 002", + "slug": "openai/text-embedding-ada-002", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - }, + } + ], + "name": "OpenAI", + "slug": "openai" + }, + { + "dataPolicy": { + "canPublish": true, + "retainsPrompts": true, + "training": true + }, + "displayName": "OpenInference", + "headquarters": "US", + "icon": { + "className": "invert dark:invert-0", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://openinference.xyz/&size=256" + }, + "models": [ { - "author": "openai", - "context_length": 200000, - "created_at": "2025-04-16T17:10:57.049467+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "google", + "context_length": 128000, + "created_at": "2025-03-12T05:12:39.645813+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["", "", ""], "default_system": null, - "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images. ", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "OpenInferenceAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 128000, "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, + "canPublish": true, + "privacyPolicyURL": "https://www.openinference.xyz/terms", "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", - "training": false + "termsOfServiceURL": "https://www.openinference.xyz/terms", + "training": true }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_file_urls": true, - "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": true, - "id": "42e72619-d01c-411c-a201-f991644768b7", + "has_completions": false, + "id": "0b75e15f-86bd-4df3-9d33-b79cf27ff5c0", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 200000, - "created_at": "2025-04-16T17:10:57.049467+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "google", + "context_length": 131072, + "created_at": "2025-03-12T05:12:39.645813+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["", "", ""], "default_system": null, - "description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following. Use it to think through multi-step problems that involve analysis across text, code, and images. ", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Gemini", "has_text_output": true, - "hf_slug": null, + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], - "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: o3", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B", "output_modalities": ["text"], - "permaslug": "openai/o3-2025-04-16", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "o3", - "slug": "openai/o3", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemma 3 27B", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/o3-2025-04-16", - "model_variant_slug": "openai/o3", + "model_variant_permaslug": "google/gemma-3-27b-it:free", + "model_variant_slug": "google/gemma-3-27b-it:free", "moderation_required": true, - "name": "OpenAI | openai/o3-2025-04-16", + "name": "OpenInference | google/gemma-3-27b-it:free", "pricing": { - "completion": "0.000008", + "completion": "0", "discount": 0, - "image": "0.00153", - "image_output": "0", - "input_cache_read": "0.0000005", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0.01" + "prompt": "0" }, - "provider_display_name": "OpenAI", + "provider_display_name": "OpenInference", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "OpenInferenceAdapter", + "baseUrl": "https://openinference.ngrok.io/v1", "byokEnabled": true, "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, + "canPublish": true, + "privacyPolicyURL": "https://www.openinference.xyz/terms", "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", - "training": false + "termsOfServiceURL": "https://www.openinference.xyz/terms", + "training": true }, - "displayName": "OpenAI", + "displayName": "OpenInference", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "className": "invert dark:invert-0", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://openinference.xyz/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "meta-llama/Llama-3.1-8B-Instruct", + "meta-llama/Llama-Guard-3-8B", + "Qwen/Qwen3-4B-Thinking-2507", + "openai/gpt-oss-120b-test", + "openai/gpt-oss-20b-test" + ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": true, - "name": "OpenAI", + "name": "OpenInference", "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "slug": "open-inference", + "statusPageUrl": null }, - "provider_model_id": "o3-2025-04-16", - "provider_name": "OpenAI", + "provider_model_id": "google/gemma-3-27b-it", + "provider_name": "OpenInference", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "open-inference/int8", + "quantization": "int8", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", - "seed", + "temperature", "max_tokens", + "stop", + "seed", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Gemini", "has_text_output": true, - "hf_slug": null, + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], - "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: o3", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B (free)", "output_modalities": ["text"], - "permaslug": "openai/o3-2025-04-16", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "o3", - "slug": "openai/o3", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Gemma 3 27B (free)", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 200000, - "created_at": "2025-10-10T20:54:21.971349+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 128000, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "OpenInferenceAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 128000, "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, + "canPublish": true, + "privacyPolicyURL": "https://www.openinference.xyz/terms", "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", - "training": false + "termsOfServiceURL": "https://www.openinference.xyz/terms", + "training": true }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_file_urls": true, - "supports_input_audio": false, - "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": true, - "id": "a697c6df-cbb4-4a6b-8b1e-04e92f00146a", + "has_completions": false, + "id": "cb31c2e3-321d-4768-9231-2a9c764b4ef8", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 200000, - "created_at": "2025-10-10T20:54:21.971349+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "GPT", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], - "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: o3 Deep Research", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "openai/o3-deep-research-2025-06-26", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "o3 Deep Research", - "slug": "openai/o3-deep-research", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/o3-deep-research-2025-06-26", - "model_variant_slug": "openai/o3-deep-research", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct:free", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct:free", "moderation_required": true, - "name": "OpenAI | openai/o3-deep-research-2025-06-26", + "name": "OpenInference | meta-llama/llama-3.3-70b-instruct:free", "pricing": { - "completion": "0.00004", + "completion": "0", "discount": 0, - "image": "0.00765", - "image_output": "0", - "input_cache_read": "0.0000025", - "internal_reasoning": "0", - "prompt": "0.00001", - "request": "0", - "web_search": "0.01" + "prompt": "0" }, - "provider_display_name": "OpenAI", + "provider_display_name": "OpenInference", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "OpenInferenceAdapter", + "baseUrl": "https://openinference.ngrok.io/v1", "byokEnabled": true, "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, + "canPublish": true, + "privacyPolicyURL": "https://www.openinference.xyz/terms", "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", - "training": false + "termsOfServiceURL": "https://www.openinference.xyz/terms", + "training": true }, - "displayName": "OpenAI", + "displayName": "OpenInference", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "className": "invert dark:invert-0", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://openinference.xyz/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "meta-llama/Llama-3.1-8B-Instruct", + "meta-llama/Llama-Guard-3-8B", + "Qwen/Qwen3-4B-Thinking-2507", + "openai/gpt-oss-120b-test", + "openai/gpt-oss-20b-test" + ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": true, - "name": "OpenAI", + "name": "OpenInference", "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "slug": "open-inference", + "statusPageUrl": null }, - "provider_model_id": "o3-deep-research-2025-06-26", - "provider_name": "OpenAI", + "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct", + "provider_name": "OpenInference", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "open-inference/int8", + "quantization": "int8", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", - "seed", - "max_tokens", "temperature", - "top_p", + "max_tokens", "stop", - "frequency_penalty", - "presence_penalty", - "logit_bias", - "logprobs", - "top_logprobs", + "seed", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" - }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } + "variant": "free" }, - "group": "GPT", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], - "instruct_type": null, - "model_version_group_id": null, - "name": "OpenAI: o3 Deep Research", + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct (free)", "output_modalities": ["text"], - "permaslug": "openai/o3-deep-research-2025-06-26", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "o3 Deep Research", - "slug": "openai/o3-deep-research", + "short_name": "Llama 3.3 70B Instruct (free)", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "openai", - "context_length": 200000, - "created_at": "2025-01-31T19:28:41.132151+00:00", - "default_parameters": {}, + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding.\n\nThis model supports the `reasoning_effort` parameter, which can be set to \"high\", \"medium\", or \"low\" to control the thinking time of the model. The default is \"medium\". OpenRouter also offers the model slug `openai/o3-mini-high` to default the parameter to \"high\".\n\nThe model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "OpenInferenceAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 131072, "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, + "canPublish": true, + "privacyPolicyURL": "https://www.openinference.xyz/terms", "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", - "training": false + "termsOfServiceURL": "https://www.openinference.xyz/terms", + "training": true }, "features": { - "supports_file_urls": true, + "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "e93c942e-7f8f-410d-8478-21ec37bc6b0d", + "id": "447dca36-a2eb-4626-8675-bddd7c74a615", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 200000, - "created_at": "2025-01-31T19:28:41.132151+00:00", - "default_parameters": {}, + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding.\n\nThis model supports the `reasoning_effort` parameter, which can be set to \"high\", \"medium\", or \"low\" to control the thinking time of the model. The default is \"medium\". OpenRouter also offers the model slug `openai/o3-mini-high` to default the parameter to \"high\".\n\nThe model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o3 Mini", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "openai/o3-mini-2025-01-31", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "o3 Mini", - "slug": "openai/o3-mini", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/o3-mini-2025-01-31", - "model_variant_slug": "openai/o3-mini", + "model_variant_permaslug": "openai/gpt-oss-120b:free", + "model_variant_slug": "openai/gpt-oss-120b:free", "moderation_required": true, - "name": "OpenAI | openai/o3-mini-2025-01-31", + "name": "OpenInference | openai/gpt-oss-120b:free", "pricing": { - "completion": "0.0000044", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000055", - "internal_reasoning": "0", - "prompt": "0.0000011", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "OpenAI", + "provider_display_name": "OpenInference", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "OpenInferenceAdapter", + "baseUrl": "https://openinference.ngrok.io/v1", "byokEnabled": true, "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, + "canPublish": true, + "privacyPolicyURL": "https://www.openinference.xyz/terms", "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", - "training": false + "termsOfServiceURL": "https://www.openinference.xyz/terms", + "training": true }, - "displayName": "OpenAI", + "displayName": "OpenInference", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "className": "invert dark:invert-0", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://openinference.xyz/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "meta-llama/Llama-3.1-8B-Instruct", + "meta-llama/Llama-Guard-3-8B", + "Qwen/Qwen3-4B-Thinking-2507", + "openai/gpt-oss-120b-test", + "openai/gpt-oss-20b-test" + ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": true, - "name": "OpenAI", + "name": "OpenInference", "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "slug": "open-inference", + "statusPageUrl": null }, - "provider_model_id": "o3-mini-2025-01-31", - "provider_name": "OpenAI", + "provider_model_id": "openai/gpt-oss-120b", + "provider_name": "OpenInference", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "open-inference/int8", + "quantization": "int8", "supported_parameters": [ - "seed", + "reasoning", + "include_reasoning", + "temperature", "max_tokens", - "response_format", - "structured_outputs", + "stop", + "seed", "tools", "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o3 Mini", + "name": "OpenAI: gpt-oss-120b (free)", "output_modalities": ["text"], - "permaslug": "openai/o3-mini-2025-01-31", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "o3 Mini", - "slug": "openai/o3-mini", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-120b (free)", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "openai", - "context_length": 200000, - "created_at": "2025-02-12T15:03:31.504126+00:00", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -123478,46 +122233,45 @@ }, "default_stops": [], "default_system": null, - "description": "OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high. \n\no3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "OpenInferenceAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 131072, "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, + "canPublish": true, + "privacyPolicyURL": "https://www.openinference.xyz/terms", "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", - "training": false + "termsOfServiceURL": "https://www.openinference.xyz/terms", + "training": true }, "features": { - "supports_file_urls": true, + "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "4a8663f2-89f3-40e9-9b50-e4838fff0155", + "id": "3be5b183-38c9-4c00-a32f-1bf267cbe924", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "openai", - "context_length": 200000, - "created_at": "2025-02-12T15:03:31.504126+00:00", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -123525,7 +122279,7 @@ }, "default_stops": [], "default_system": null, - "description": "OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high. \n\no3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. The model features three adjustable reasoning effort levels and supports key developer capabilities including function calling, structured outputs, and streaming, though it does not include vision processing capabilities.\n\nThe model demonstrates significant improvements over its predecessor, with expert testers preferring its responses 56% of the time and noting a 39% reduction in major errors on complex questions. With medium reasoning effort settings, o3-mini matches the performance of the larger o1 model on challenging reasoning evaluations like AIME and GPQA, while maintaining lower latency and cost.", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "features": { "chat_template_config": { "should_hoist_and_merge_system_messages": null @@ -123538,90 +122292,91 @@ }, "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o3 Mini High", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "openai/o3-mini-high-2025-01-31", + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "o3 Mini High", - "slug": "openai/o3-mini-high", - "updated_at": "2026-01-08T00:52:39.57688+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/o3-mini-high-2025-01-31", - "model_variant_slug": "openai/o3-mini-high", + "model_variant_permaslug": "openai/gpt-oss-20b:free", + "model_variant_slug": "openai/gpt-oss-20b:free", "moderation_required": true, - "name": "OpenAI | openai/o3-mini-high-2025-01-31", + "name": "OpenInference | openai/gpt-oss-20b:free", "pricing": { - "completion": "0.0000044", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000055", - "internal_reasoning": "0", - "prompt": "0.0000011", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "OpenAI", + "provider_display_name": "OpenInference", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "OpenInferenceAdapter", + "baseUrl": "https://openinference.ngrok.io/v1", "byokEnabled": true, "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, + "canPublish": true, + "privacyPolicyURL": "https://www.openinference.xyz/terms", "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", - "training": false + "termsOfServiceURL": "https://www.openinference.xyz/terms", + "training": true }, - "displayName": "OpenAI", + "displayName": "OpenInference", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "className": "invert dark:invert-0", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://openinference.xyz/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "meta-llama/Llama-3.1-8B-Instruct", + "meta-llama/Llama-Guard-3-8B", + "Qwen/Qwen3-4B-Thinking-2507", + "openai/gpt-oss-120b-test", + "openai/gpt-oss-20b-test" + ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": true, - "name": "OpenAI", + "name": "OpenInference", "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "slug": "open-inference", + "statusPageUrl": null }, - "provider_model_id": "o3-mini-2025-01-31", - "provider_name": "OpenAI", + "provider_model_id": "openai/gpt-oss-20b", + "provider_name": "OpenInference", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "open-inference/int8", + "quantization": "int8", "supported_parameters": [ - "seed", + "reasoning", + "include_reasoning", + "temperature", "max_tokens", - "response_format", - "structured_outputs", - "tools", - "tool_choice" + "stop", + "seed", + "tool_choice", + "tools" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { "chat_template_config": { @@ -123635,63 +122390,79 @@ }, "group": "GPT", "has_text_output": true, - "hf_slug": null, + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o3 Mini High", + "name": "OpenAI: gpt-oss-20b (free)", "output_modalities": ["text"], - "permaslug": "openai/o3-mini-high-2025-01-31", + "permaslug": "openai/gpt-oss-20b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "o3 Mini High", - "slug": "openai/o3-mini-high", - "updated_at": "2026-01-08T00:52:39.57688+00:00", + "short_name": "gpt-oss-20b (free)", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null - }, + } + ], + "name": "OpenInference", + "slug": "open-inference" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Parasail", + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" + }, + "models": [ { - "author": "openai", - "context_length": 200000, - "created_at": "2025-06-10T23:32:32.266087+00:00", - "default_parameters": {}, + "author": "allenai", + "context_length": 36864, + "created_at": "2026-01-09T22:11:12.589713+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.\n\nNote that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations", + "description": "Molmo2-8B is an open vision-language model developed by the Allen Institute for AI (Ai2) as part of the Molmo2 family, supporting image, video, and multi-image understanding and grounding. It is based on Qwen3-8B and uses SigLIP 2 as its vision backbone, outperforming other open-weight, open-data models on short videos, counting, and captioning, while remaining competitive on long-video tasks.", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 36864, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_file_urls": true, - "supports_native_web_search": true, + "supports_base64_video_input": true, + "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, "literal_required": true, "type_function": true - } + }, + "supports_video_urls": true }, "has_chat_completions": true, "has_completions": true, - "id": "b8222376-66ee-4b89-a7c9-e627ba35db79", + "id": "cf9ded82-8ba1-4251-a6cb-ccde51315098", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -123700,168 +122471,171 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": 36864, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 200000, - "created_at": "2025-06-10T23:32:32.266087+00:00", - "default_parameters": {}, + "author": "allenai", + "context_length": 36864, + "created_at": "2026-01-09T22:11:12.589713+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently better answers.\n\nNote that BYOK is required for this model. Set up here: https://openrouter.ai/settings/integrations", + "description": "Molmo2-8B is an open vision-language model developed by the Allen Institute for AI (Ai2) as part of the Molmo2 family, supporting image, video, and multi-image understanding and grounding. It is based on Qwen3-8B and uses SigLIP 2 as its vision backbone, outperforming other open-weight, open-data models on short videos, counting, and captioning, while remaining competitive on long-video tasks.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "allenai/Molmo2-8B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "file", "image"], + "input_modalities": ["text", "image", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o3 Pro", + "name": "AllenAI: Molmo2 8B", "output_modalities": ["text"], - "permaslug": "openai/o3-pro-2025-06-10", + "permaslug": "allenai/molmo-2-8b-20260109", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "o3 Pro", - "slug": "openai/o3-pro", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": "OpenAI requires bringing your own API key to use o3-pro over the API. Set up here: https://openrouter.ai/settings/integrations" + "short_name": "Molmo2 8B", + "slug": "allenai/molmo-2-8b", + "updated_at": "2026-02-04T19:13:22.202564+00:00", + "warning_message": null }, - "model_variant_permaslug": "openai/o3-pro-2025-06-10", - "model_variant_slug": "openai/o3-pro", - "moderation_required": true, - "name": "OpenAI | openai/o3-pro-2025-06-10", + "model_variant_permaslug": "allenai/molmo-2-8b-20260109", + "model_variant_slug": "allenai/molmo-2-8b", + "moderation_required": false, + "name": "Parasail | allenai/molmo-2-8b-20260109", "pricing": { - "completion": "0.00008", + "completion": "0.0000002", "discount": 0, - "image": "0.0153", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00002", - "request": "0", - "web_search": "0.01" + "prompt": "0.0000002" }, - "provider_display_name": "OpenAI", + "provider_display_name": "Parasail", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "ParasailAdapter", + "baseUrl": "https://api.parasail.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, - "displayName": "OpenAI", + "displayName": "Parasail", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "parasail-olmo-2-1124-7b-instruct", + "parasail-qwen3-omni-30b-a3b-thinking", + "parasail-qwen3-omni-30b-a3b-instruct", + "parasail-dots-ocr", + "parasail-auto-glm-9b-multilingual" + ], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": true, - "name": "OpenAI", - "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "moderationRequired": false, + "name": "Parasail", + "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], + "slug": "parasail", + "statusPageUrl": null }, - "provider_model_id": "o3-pro-2025-06-10", - "provider_name": "OpenAI", + "provider_model_id": "parasail-molmo2-8b", + "provider_name": "Parasail", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "parasail/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", - "seed", "max_tokens", - "tools", - "tool_choice" + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "repetition_penalty", + "seed", + "stop", + "top_k", + "logit_bias" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "allenai/Molmo2-8B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "file", "image"], + "input_modalities": ["text", "image", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o3 Pro", + "name": "AllenAI: Molmo2 8B", "output_modalities": ["text"], - "permaslug": "openai/o3-pro-2025-06-10", + "permaslug": "allenai/molmo-2-8b-20260109", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "o3 Pro", - "slug": "openai/o3-pro", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": "OpenAI requires bringing your own API key to use o3-pro over the API. Set up here: https://openrouter.ai/settings/integrations" + "short_name": "Molmo2 8B", + "slug": "allenai/molmo-2-8b", + "updated_at": "2026-02-04T19:13:22.202564+00:00", + "warning_message": null }, { - "author": "openai", - "context_length": 200000, - "created_at": "2025-04-16T16:29:02.980764+00:00", - "default_parameters": {}, + "author": "allenai", + "context_length": 65536, + "created_at": "2025-11-21T20:51:16.304522+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", + "description": "Olmo 3 32B Think is a large-scale, 32-billion-parameter model purpose-built for deep reasoning, complex logic chains and advanced instruction-following scenarios. Its capacity enables strong performance on demanding evaluation tasks and highly nuanced conversational reasoning. Developed by Ai2 under the Apache 2.0 license, Olmo 3 32B Think embodies the Olmo initiative’s commitment to openness, offering full transparency across weights, code and training methodology.", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 65536, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_file_urls": true, - "supports_native_web_search": true, + "disable_free_endpoint_limits": true, + "is_mandatory_reasoning": true, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -123871,183 +122645,186 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "bd121898-b27c-4e2c-bc92-278627465a54", + "id": "7282df05-53d5-4f61-9104-05a2ea109ffa", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 50, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 200000, - "created_at": "2025-04-16T16:29:02.980764+00:00", - "default_parameters": {}, + "author": "allenai", + "context_length": 65536, + "created_at": "2025-11-21T20:51:16.304522+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.6, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", + "description": "Olmo 3 32B Think is a large-scale, 32-billion-parameter model purpose-built for deep reasoning, complex logic chains and advanced instruction-following scenarios. Its capacity enables strong performance on demanding evaluation tasks and highly nuanced conversational reasoning. Developed by Ai2 under the Apache 2.0 license, Olmo 3 32B Think embodies the Olmo initiative’s commitment to openness, offering full transparency across weights, code and training methodology.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "allenai/Olmo-3-32B-Think", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o4 Mini", + "name": "AllenAI: Olmo 3 32B Think", "output_modalities": ["text"], - "permaslug": "openai/o4-mini-2025-04-16", + "permaslug": "allenai/olmo-3-32b-think-20251121", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "o4 Mini", - "slug": "openai/o4-mini", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Olmo 3 32B Think", + "slug": "allenai/olmo-3-32b-think", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/o4-mini-2025-04-16", - "model_variant_slug": "openai/o4-mini", - "moderation_required": true, - "name": "OpenAI | openai/o4-mini-2025-04-16", + "model_variant_permaslug": "allenai/olmo-3-32b-think-20251121", + "model_variant_slug": "allenai/olmo-3-32b-think", + "moderation_required": false, + "name": "Parasail | allenai/olmo-3-32b-think-20251121", "pricing": { - "completion": "0.0000044", + "completion": "0.0000005", "discount": 0, - "image": "0.0008415", - "image_output": "0", - "input_cache_read": "0.000000275", - "internal_reasoning": "0", - "prompt": "0.0000011", - "request": "0", - "web_search": "0.01" + "prompt": "0.00000015" }, - "provider_display_name": "OpenAI", + "provider_display_name": "Parasail", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "ParasailAdapter", + "baseUrl": "https://api.parasail.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, - "displayName": "OpenAI", + "displayName": "Parasail", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "parasail-olmo-2-1124-7b-instruct", + "parasail-qwen3-omni-30b-a3b-thinking", + "parasail-qwen3-omni-30b-a3b-instruct", + "parasail-dots-ocr", + "parasail-auto-glm-9b-multilingual" + ], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": true, - "name": "OpenAI", - "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "moderationRequired": false, + "name": "Parasail", + "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], + "slug": "parasail", + "statusPageUrl": null }, - "provider_model_id": "o4-mini-2025-04-16", - "provider_name": "OpenAI", + "provider_model_id": "parasail-olmo-3-32b-think", + "provider_name": "Parasail", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "parasail/bf16", + "quantization": "bf16", "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", - "seed", "max_tokens", - "tools", - "tool_choice" + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "repetition_penalty", + "seed", + "stop", + "top_k", + "logit_bias", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "allenai/Olmo-3-32B-Think", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o4 Mini", + "name": "AllenAI: Olmo 3 32B Think", "output_modalities": ["text"], - "permaslug": "openai/o4-mini-2025-04-16", + "permaslug": "allenai/olmo-3-32b-think-20251121", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "o4 Mini", - "slug": "openai/o4-mini", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Olmo 3 32B Think", + "slug": "allenai/olmo-3-32b-think", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 200000, - "created_at": "2025-10-10T20:54:02.725273+00:00", + "author": "allenai", + "context_length": 65536, + "created_at": "2025-11-21T20:51:13.585428+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.6, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", + "description": "Olmo 3 7B Instruct is a supervised instruction-fine-tuned variant of the Olmo 3 7B base model, optimized for instruction-following, question-answering, and natural conversational dialogue. By leveraging high-quality instruction data and an open training pipeline, it delivers strong performance across everyday NLP tasks while remaining accessible and easy to integrate. Developed by Ai2 under the Apache 2.0 license, the model offers a transparent, community-friendly option for instruction-driven applications.", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 65536, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_file_urls": true, "supports_input_audio": false, - "supports_native_web_search": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -124057,7 +122834,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "97f9f7c3-9e55-4f6d-8de4-459d4f23e2a9", + "id": "3dfd1d32-ce94-4ed2-bfd8-0de2e0371646", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -124066,185 +122843,176 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 200000, - "created_at": "2025-10-10T20:54:02.725273+00:00", + "author": "allenai", + "context_length": 65536, + "created_at": "2025-11-21T20:51:13.585428+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.6, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.", + "description": "Olmo 3 7B Instruct is a supervised instruction-fine-tuned variant of the Olmo 3 7B base model, optimized for instruction-following, question-answering, and natural conversational dialogue. By leveraging high-quality instruction data and an open training pipeline, it delivers strong performance across everyday NLP tasks while remaining accessible and easy to integrate. Developed by Ai2 under the Apache 2.0 license, the model offers a transparent, community-friendly option for instruction-driven applications.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "allenai/Olmo-3-7B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o4 Mini Deep Research", + "name": "AllenAI: Olmo 3 7B Instruct", "output_modalities": ["text"], - "permaslug": "openai/o4-mini-deep-research-2025-06-26", + "permaslug": "allenai/olmo-3-7b-instruct-20251121", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "o4 Mini Deep Research", - "slug": "openai/o4-mini-deep-research", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Olmo 3 7B Instruct", + "slug": "allenai/olmo-3-7b-instruct", + "updated_at": "2025-11-22T00:00:24.418612+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/o4-mini-deep-research-2025-06-26", - "model_variant_slug": "openai/o4-mini-deep-research", - "moderation_required": true, - "name": "OpenAI | openai/o4-mini-deep-research-2025-06-26", + "model_variant_permaslug": "allenai/olmo-3-7b-instruct-20251121", + "model_variant_slug": "allenai/olmo-3-7b-instruct", + "moderation_required": false, + "name": "Parasail | allenai/olmo-3-7b-instruct-20251121", "pricing": { - "completion": "0.000008", + "completion": "0.0000002", "discount": 0, - "image": "0.00153", - "image_output": "0", - "input_cache_read": "0.0000005", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0.01" + "prompt": "0.0000001" }, - "provider_display_name": "OpenAI", + "provider_display_name": "Parasail", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "ParasailAdapter", + "baseUrl": "https://api.parasail.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, - "displayName": "OpenAI", + "displayName": "Parasail", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "parasail-olmo-2-1124-7b-instruct", + "parasail-qwen3-omni-30b-a3b-thinking", + "parasail-qwen3-omni-30b-a3b-instruct", + "parasail-dots-ocr", + "parasail-auto-glm-9b-multilingual" + ], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": true, - "name": "OpenAI", - "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "moderationRequired": false, + "name": "Parasail", + "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], + "slug": "parasail", + "statusPageUrl": null }, - "provider_model_id": "o4-mini-deep-research-2025-06-26", - "provider_name": "OpenAI", + "provider_model_id": "parasail-olmo-3-7b-instruct", + "provider_name": "Parasail", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "parasail/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", - "seed", "max_tokens", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", + "repetition_penalty", + "seed", + "stop", + "top_k", "logit_bias", - "logprobs", - "top_logprobs", - "tools", - "tool_choice" + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "allenai/Olmo-3-7B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["file", "image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o4 Mini Deep Research", + "name": "AllenAI: Olmo 3 7B Instruct", "output_modalities": ["text"], - "permaslug": "openai/o4-mini-deep-research-2025-06-26", + "permaslug": "allenai/olmo-3-7b-instruct-20251121", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "o4 Mini Deep Research", - "slug": "openai/o4-mini-deep-research", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Olmo 3 7B Instruct", + "slug": "allenai/olmo-3-7b-instruct", + "updated_at": "2025-11-22T00:00:24.418612+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 200000, - "created_at": "2025-04-16T17:23:32.042157+00:00", + "author": "allenai", + "context_length": 65536, + "created_at": "2025-11-21T20:51:10.334947+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.6, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high. \n\nOpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", + "description": "Olmo 3 7B Think is a research-oriented language model in the Olmo family designed for advanced reasoning and instruction-driven tasks. It excels at multi-step problem solving, logical inference, and maintaining coherent conversational context. Developed by Ai2 under the Apache 2.0 license, Olmo 3 7B Think supports transparent, fully open experimentation and provides a lightweight yet capable foundation for academic research and practical NLP workflows.", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 65536, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_file_urls": true, - "supports_native_web_search": true, + "is_mandatory_reasoning": true, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -124254,7 +123022,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "60020533-2fb2-4aa1-9454-181029fd52de", + "id": "62189f7c-259a-4dc3-8e85-7769034f981c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -124263,174 +123031,178 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 100000, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 200000, - "created_at": "2025-04-16T17:23:32.042157+00:00", + "author": "allenai", + "context_length": 65536, + "created_at": "2025-11-21T20:51:10.334947+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.6, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high. \n\nOpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning and coding performance across benchmarks like AIME (99.5% with Python) and SWE-bench, outperforming its predecessor o3-mini and even approaching o3 in some domains.\n\nDespite its smaller size, o4-mini exhibits high accuracy in STEM tasks, visual problem solving (e.g., MathVista, MMMU), and code editing. It is especially well-suited for high-throughput scenarios where latency or cost is critical. Thanks to its efficient architecture and refined reinforcement learning training, o4-mini can chain tools, generate structured outputs, and solve multi-step tasks with minimal delay—often in under a minute.", + "description": "Olmo 3 7B Think is a research-oriented language model in the Olmo family designed for advanced reasoning and instruction-driven tasks. It excels at multi-step problem solving, logical inference, and maintaining coherent conversational context. Developed by Ai2 under the Apache 2.0 license, Olmo 3 7B Think supports transparent, fully open experimentation and provides a lightweight yet capable foundation for academic research and practical NLP workflows.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "allenai/Olmo-3-7B-Think", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o4 Mini High", + "name": "AllenAI: Olmo 3 7B Think", "output_modalities": ["text"], - "permaslug": "openai/o4-mini-high-2025-04-16", + "permaslug": "allenai/olmo-3-7b-think-20251121", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "o4 Mini High", - "slug": "openai/o4-mini-high", - "updated_at": "2026-01-08T00:53:50.4178+00:00", + "short_name": "Olmo 3 7B Think", + "slug": "allenai/olmo-3-7b-think", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/o4-mini-high-2025-04-16", - "model_variant_slug": "openai/o4-mini-high", - "moderation_required": true, - "name": "OpenAI | openai/o4-mini-high-2025-04-16", + "model_variant_permaslug": "allenai/olmo-3-7b-think-20251121", + "model_variant_slug": "allenai/olmo-3-7b-think", + "moderation_required": false, + "name": "Parasail | allenai/olmo-3-7b-think-20251121", "pricing": { - "completion": "0.0000044", + "completion": "0.0000002", "discount": 0, - "image": "0.0008415", - "image_output": "0", - "input_cache_read": "0.000000275", - "internal_reasoning": "0", - "prompt": "0.0000011", - "request": "0", - "web_search": "0.01" + "prompt": "0.00000012" }, - "provider_display_name": "OpenAI", + "provider_display_name": "Parasail", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "ParasailAdapter", + "baseUrl": "https://api.parasail.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, - "displayName": "OpenAI", + "displayName": "Parasail", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "parasail-olmo-2-1124-7b-instruct", + "parasail-qwen3-omni-30b-a3b-thinking", + "parasail-qwen3-omni-30b-a3b-instruct", + "parasail-dots-ocr", + "parasail-auto-glm-9b-multilingual" + ], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": true, - "name": "OpenAI", - "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "moderationRequired": false, + "name": "Parasail", + "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], + "slug": "parasail", + "statusPageUrl": null }, - "provider_model_id": "o4-mini-2025-04-16", - "provider_name": "OpenAI", + "provider_model_id": "parasail-olmo-3-7b-think", + "provider_name": "Parasail", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "parasail/bf16", + "quantization": "bf16", "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", - "seed", "max_tokens", - "tools", - "tool_choice" + "temperature", + "top_p", + "frequency_penalty", + "presence_penalty", + "repetition_penalty", + "seed", + "stop", + "top_k", + "logit_bias", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "allenai/Olmo-3-7B-Think", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "file"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: o4 Mini High", + "name": "AllenAI: Olmo 3 7B Think", "output_modalities": ["text"], - "permaslug": "openai/o4-mini-high-2025-04-16", + "permaslug": "allenai/olmo-3-7b-think-20251121", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "o4 Mini High", - "slug": "openai/o4-mini-high", - "updated_at": "2026-01-08T00:53:50.4178+00:00", + "short_name": "Olmo 3 7B Think", + "slug": "allenai/olmo-3-7b-think", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 8192, - "created_at": "2025-10-30T22:21:06.503374+00:00", + "author": "allenai", + "context_length": 65536, + "created_at": "2025-12-16T17:55:19+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.6, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "text-embedding-3-large is OpenAI's most capable embedding model for both english and non-english tasks. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", + "description": "Olmo 3.1 32B Think is a large-scale, 32-billion-parameter model designed for deep reasoning, complex multi-step logic, and advanced instruction following. Building on the Olmo 3 series, version 3.1 delivers refined reasoning behavior and stronger performance across demanding evaluations and nuanced conversational tasks. Developed by Ai2 under the Apache 2.0 license, Olmo 3.1 32B Think continues the Olmo initiative’s commitment to openness, providing full transparency across model weights, code, and training methodology.", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 65536, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, "features": { - "supports_input_audio": false, + "disable_free_endpoint_limits": true, + "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -124440,126 +123212,127 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "8083d8ef-5e78-4124-8536-f65ba99e2a8a", + "id": "73409d73-26ee-4ef9-9055-aee54589583f", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 50, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 65536, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 8192, - "created_at": "2025-10-30T22:21:06.503374+00:00", + "author": "allenai", + "context_length": 65536, + "created_at": "2025-12-16T17:55:19+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.6, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "text-embedding-3-large is OpenAI's most capable embedding model for both english and non-english tasks. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", + "description": "Olmo 3.1 32B Think is a large-scale, 32-billion-parameter model designed for deep reasoning, complex multi-step logic, and advanced instruction following. Building on the Olmo 3 series, version 3.1 delivers refined reasoning behavior and stronger performance across demanding evaluations and nuanced conversational tasks. Developed by Ai2 under the Apache 2.0 license, Olmo 3.1 32B Think continues the Olmo initiative’s commitment to openness, providing full transparency across model weights, code, and training methodology.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Other", - "has_text_output": false, - "hf_slug": null, + "has_text_output": true, + "hf_slug": "allenai/Olmo-3.1-32B-Think", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: Text Embedding 3 Large", - "output_modalities": ["embeddings"], - "permaslug": "openai/text-embedding-3-large", + "name": "AllenAI: Olmo 3.1 32B Think", + "output_modalities": ["text"], + "permaslug": "allenai/olmo-3.1-32b-think-20251215", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Text Embedding 3 Large", - "slug": "openai/text-embedding-3-large", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Olmo 3.1 32B Think", + "slug": "allenai/olmo-3.1-32b-think", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/text-embedding-3-large", - "model_variant_slug": "openai/text-embedding-3-large", - "moderation_required": true, - "name": "OpenAI | openai/text-embedding-3-large", + "model_variant_permaslug": "allenai/olmo-3.1-32b-think-20251215", + "model_variant_slug": "allenai/olmo-3.1-32b-think", + "moderation_required": false, + "name": "Parasail | allenai/olmo-3.1-32b-think-20251215", "pricing": { - "completion": "0", + "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000013", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, - "provider_display_name": "OpenAI", + "provider_display_name": "Parasail", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "ParasailAdapter", + "baseUrl": "https://api.parasail.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, - "displayName": "OpenAI", + "displayName": "Parasail", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "parasail-olmo-2-1124-7b-instruct", + "parasail-qwen3-omni-30b-a3b-thinking", + "parasail-qwen3-omni-30b-a3b-instruct", + "parasail-dots-ocr", + "parasail-auto-glm-9b-multilingual" + ], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": true, - "name": "OpenAI", - "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "moderationRequired": false, + "name": "Parasail", + "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], + "slug": "parasail", + "statusPageUrl": null }, - "provider_model_id": "text-embedding-3-large", - "provider_name": "OpenAI", + "provider_model_id": "parasail-olmo-31-32b-think", + "provider_name": "Parasail", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "parasail/bf16", + "quantization": "bf16", "supported_parameters": [ - "seed", + "reasoning", + "include_reasoning", "max_tokens", - "response_format", - "structured_outputs", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", + "repetition_penalty", + "seed", + "stop", + "top_k", "logit_bias", - "logprobs", - "top_logprobs" + "structured_outputs", + "response_format" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" @@ -124567,55 +123340,50 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Other", - "has_text_output": false, - "hf_slug": null, + "has_text_output": true, + "hf_slug": "allenai/Olmo-3.1-32B-Think", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: Text Embedding 3 Large", - "output_modalities": ["embeddings"], - "permaslug": "openai/text-embedding-3-large", + "name": "AllenAI: Olmo 3.1 32B Think", + "output_modalities": ["text"], + "permaslug": "allenai/olmo-3.1-32b-think-20251215", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Text Embedding 3 Large", - "slug": "openai/text-embedding-3-large", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Olmo 3.1 32B Think", + "slug": "allenai/olmo-3.1-32b-think", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 8192, - "created_at": "2025-10-30T20:50:55+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "bytedance", + "context_length": 128000, + "created_at": "2025-07-22T17:24:16.94785+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": " text-embedding-3-small is OpenAI's improved, more performant version of the ada embedding model. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", + "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement learning-based reasoning, enabling robust action planning and execution across virtual interfaces.\n\nThis model achieves state-of-the-art results on a range of interactive and grounding benchmarks, including OSworld, WebVoyager, AndroidWorld, and ScreenSpot. It also demonstrates perfect task completion across diverse Poki games and outperforms prior models in Minecraft agent tasks. UI-TARS-1.5 supports thought decomposition during inference and shows strong scaling across variants, with the 1.5 version notably exceeding the performance of earlier 72B and 7B checkpoints.", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, "features": { @@ -124629,7 +123397,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "d88ee4ad-6cb6-4b9e-b84c-5ca8a4c58e58", + "id": "22a474a1-cb4e-42d2-b19a-c1be103f5abd", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -124638,23 +123406,18 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 8192, - "created_at": "2025-10-30T20:50:55+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "bytedance", + "context_length": 128000, + "created_at": "2025-07-22T17:24:16.94785+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": " text-embedding-3-small is OpenAI's improved, more performant version of the ada embedding model. Embeddings are a numerical representation of text that can be used to measure the relatedness between two pieces of text. Embeddings are useful for search, clustering, recommendations, anomaly detection, and classification tasks.", + "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement learning-based reasoning, enabling robust action planning and execution across virtual interfaces.\n\nThis model achieves state-of-the-art results on a range of interactive and grounding benchmarks, including OSworld, WebVoyager, AndroidWorld, and ScreenSpot. It also demonstrates perfect task completion across diverse Poki games and outperforms prior models in Minecraft agent tasks. UI-TARS-1.5 supports thought decomposition during inference and shows strong scaling across variants, with the 1.5 version notably exceeding the performance of earlier 72B and 7B checkpoints.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -124662,90 +123425,87 @@ } }, "group": "Other", - "has_text_output": false, - "hf_slug": null, + "has_text_output": true, + "hf_slug": "ByteDance-Seed/UI-TARS-1.5-7B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: Text Embedding 3 Small", - "output_modalities": ["embeddings"], - "permaslug": "openai/text-embedding-3-small", + "name": "ByteDance: UI-TARS 7B ", + "output_modalities": ["text"], + "permaslug": "bytedance/ui-tars-1.5-7b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Text Embedding 3 Small", - "slug": "openai/text-embedding-3-small", + "short_name": "UI-TARS 7B ", + "slug": "bytedance/ui-tars-1.5-7b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/text-embedding-3-small", - "model_variant_slug": "openai/text-embedding-3-small", - "moderation_required": true, - "name": "OpenAI | openai/text-embedding-3-small", + "model_variant_permaslug": "bytedance/ui-tars-1.5-7b", + "model_variant_slug": "bytedance/ui-tars-1.5-7b", + "moderation_required": false, + "name": "Parasail | bytedance/ui-tars-1.5-7b", "pricing": { - "completion": "0", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, - "provider_display_name": "OpenAI", + "provider_display_name": "Parasail", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "ParasailAdapter", + "baseUrl": "https://api.parasail.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, - "displayName": "OpenAI", + "displayName": "Parasail", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "parasail-olmo-2-1124-7b-instruct", + "parasail-qwen3-omni-30b-a3b-thinking", + "parasail-qwen3-omni-30b-a3b-instruct", + "parasail-dots-ocr", + "parasail-auto-glm-9b-multilingual" + ], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": true, - "name": "OpenAI", - "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "moderationRequired": false, + "name": "Parasail", + "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], + "slug": "parasail", + "statusPageUrl": null }, - "provider_model_id": "text-embedding-3-small", - "provider_name": "OpenAI", + "provider_model_id": "parasail-ui-tars-1p5-7b", + "provider_name": "Parasail", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "parasail/bf16", + "quantization": "bf16", "supported_parameters": [ - "seed", "max_tokens", - "response_format", - "structured_outputs", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", - "logit_bias", - "logprobs", - "top_logprobs" + "repetition_penalty", + "seed", + "stop", + "top_k", + "logit_bias" ], "supports_multipart": true, "supports_reasoning": false, @@ -124754,7 +123514,6 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -124762,63 +123521,62 @@ } }, "group": "Other", - "has_text_output": false, - "hf_slug": null, + "has_text_output": true, + "hf_slug": "ByteDance-Seed/UI-TARS-1.5-7B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: Text Embedding 3 Small", - "output_modalities": ["embeddings"], - "permaslug": "openai/text-embedding-3-small", + "name": "ByteDance: UI-TARS 7B ", + "output_modalities": ["text"], + "permaslug": "bytedance/ui-tars-1.5-7b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Text Embedding 3 Small", - "slug": "openai/text-embedding-3-small", + "short_name": "UI-TARS 7B ", + "slug": "bytedance/ui-tars-1.5-7b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 8192, - "created_at": "2025-10-30T23:09:58.907998+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-12-01T13:10:42.818885+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "text-embedding-ada-002 is OpenAI's legacy text embedding model.", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "endpoint": { - "adapter_name": "OpenAIResponsesAdapter", + "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 163840, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, "features": { - "supports_input_audio": false, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "1a2e0fcd-fb63-48b8-8257-5d2891aa8d25", + "id": "4c18a672-9069-469d-9e6b-8af2300fe7b7", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -124827,117 +123585,118 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 163840, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 8192, - "created_at": "2025-10-30T23:09:58.907998+00:00", + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-12-01T13:10:42.818885+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "text-embedding-ada-002 is OpenAI's legacy text embedding model.", + "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Other", - "has_text_output": false, - "hf_slug": null, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: Text Embedding Ada 002", - "output_modalities": ["embeddings"], - "permaslug": "openai/text-embedding-ada-002", + "name": "DeepSeek: DeepSeek V3.2", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-v3.2-20251201", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Text Embedding Ada 002", - "slug": "openai/text-embedding-ada-002", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "DeepSeek V3.2", + "slug": "deepseek/deepseek-v3.2", + "updated_at": "2025-12-01T14:46:05.824401+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/text-embedding-ada-002", - "model_variant_slug": "openai/text-embedding-ada-002", - "moderation_required": true, - "name": "OpenAI | openai/text-embedding-ada-002", + "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", + "model_variant_slug": "deepseek/deepseek-v3.2", + "moderation_required": false, + "name": "Parasail | deepseek/deepseek-v3.2-20251201", "pricing": { - "completion": "0", + "completion": "0.00000045", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000028" }, - "provider_display_name": "OpenAI", + "provider_display_name": "Parasail", "provider_info": { - "adapterName": "OpenAIResponsesAdapter", - "baseUrl": "https://api.openai.com/v1", + "adapterName": "ParasailAdapter", + "baseUrl": "https://api.parasail.io/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://openai.com/policies/privacy-policy/", - "requiresUserIDs": true, - "retainsPrompts": true, - "termsOfServiceURL": "https://openai.com/policies/row-terms-of-use/", + "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.parasail.io/legal/terms", "training": false }, - "displayName": "OpenAI", + "displayName": "Parasail", "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, "headquarters": "US", "icon": { - "className": "invert-0 dark:invert", - "url": "/images/icons/OpenAI.svg" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "parasail-olmo-2-1124-7b-instruct", + "parasail-qwen3-omni-30b-a3b-thinking", + "parasail-qwen3-omni-30b-a3b-instruct", + "parasail-dots-ocr", + "parasail-auto-glm-9b-multilingual" + ], "isAbortable": true, "isMultipartSupported": true, - "moderationRequired": true, - "name": "OpenAI", - "owners": ["{}"], - "slug": "openai", - "statusPageUrl": "https://status.openai.com/" + "moderationRequired": false, + "name": "Parasail", + "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], + "slug": "parasail", + "statusPageUrl": null }, - "provider_model_id": "text-embedding-ada-002", - "provider_name": "OpenAI", + "provider_model_id": "parasail-deepseek-v32", + "provider_name": "Parasail", "provider_region": null, - "provider_slug": "openai", - "quantization": "unknown", + "provider_slug": "parasail/fp8", + "quantization": "fp8", "supported_parameters": [ - "seed", + "reasoning", + "include_reasoning", "max_tokens", - "response_format", - "structured_outputs", "temperature", "top_p", - "stop", "frequency_penalty", "presence_penalty", + "repetition_penalty", + "seed", + "stop", + "top_k", "logit_bias", - "logprobs", - "top_logprobs" + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" @@ -124945,81 +123704,49 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Other", - "has_text_output": false, - "hf_slug": null, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3.2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: Text Embedding Ada 002", - "output_modalities": ["embeddings"], - "permaslug": "openai/text-embedding-ada-002", + "name": "DeepSeek: DeepSeek V3.2", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-v3.2-20251201", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Text Embedding Ada 002", - "slug": "openai/text-embedding-ada-002", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "DeepSeek V3.2", + "slug": "deepseek/deepseek-v3.2", + "updated_at": "2025-12-01T14:46:05.824401+00:00", "warning_message": null - } - ], - "name": "OpenAI", - "slug": "openai" - }, - { - "dataPolicy": { - "canPublish": true, - "retainsPrompts": true, - "training": true - }, - "displayName": "OpenInference", - "headquarters": "US", - "icon": { - "className": "invert dark:invert-0", - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://openinference.xyz/&size=256" - }, - "models": [], - "name": "OpenInference", - "slug": "open-inference" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "Parasail", - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" - }, - "models": [ + }, { - "author": "allenai", - "context_length": 65536, - "created_at": "2025-11-21T20:51:16.304522+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-12-01T13:13:57.971996+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, + "temperature": 1, "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Olmo 3 32B Think is a large-scale, 32-billion-parameter model purpose-built for deep reasoning, complex logic chains and advanced instruction-following scenarios. Its capacity enables strong performance on demanding evaluation tasks and highly nuanced conversational reasoning. Developed by Ai2 under the Apache 2.0 license, Olmo 3 32B Think embodies the Olmo initiative’s commitment to openness, offering full transparency across weights, code and training methodology.", + "description": "DeepSeek-V3.2-Speciale is a high-compute variant of DeepSeek-V3.2 optimized for maximum reasoning and agentic performance. It builds on DeepSeek Sparse Attention (DSA) for efficient long-context processing, then scales post-training reinforcement learning to push capability beyond the base model. Reported evaluations place Speciale ahead of GPT-5 on difficult reasoning workloads, with proficiency comparable to Gemini-3.0-Pro, while retaining strong coding and tool-use reliability. Like V3.2, it benefits from a large-scale agentic task synthesis pipeline that improves compliance and generalization in interactive environments.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 65536, + "context_length": 163840, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -125028,9 +123755,7 @@ "training": false }, "features": { - "disable_free_endpoint_limits": true, "is_mandatory_reasoning": true, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -125040,30 +123765,30 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "7282df05-53d5-4f61-9104-05a2ea109ffa", + "id": "f24a9a98-87af-4e35-8744-5fbb344f12d7", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 50, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 163840, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "allenai", - "context_length": 65536, - "created_at": "2025-11-21T20:51:16.304522+00:00", + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-12-01T13:13:57.971996+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, + "temperature": 1, "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Olmo 3 32B Think is a large-scale, 32-billion-parameter model purpose-built for deep reasoning, complex logic chains and advanced instruction-following scenarios. Its capacity enables strong performance on demanding evaluation tasks and highly nuanced conversational reasoning. Developed by Ai2 under the Apache 2.0 license, Olmo 3 32B Think embodies the Olmo initiative’s commitment to openness, offering full transparency across weights, code and training methodology.", + "description": "DeepSeek-V3.2-Speciale is a high-compute variant of DeepSeek-V3.2 optimized for maximum reasoning and agentic performance. It builds on DeepSeek Sparse Attention (DSA) for efficient long-context processing, then scales post-training reinforcement learning to push capability beyond the base model. Reported evaluations place Speciale ahead of GPT-5 on difficult reasoning workloads, with proficiency comparable to Gemini-3.0-Pro, while retaining strong coding and tool-use reliability. Like V3.2, it benefits from a large-scale agentic task synthesis pipeline that improves compliance and generalization in interactive environments.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -125072,41 +123797,36 @@ "system_prompt": null } }, - "group": "Other", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "allenai/Olmo-3-32B-Think", + "hf_slug": "deepseek-ai/DeepSeek-V3.2-Speciale", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "AllenAI: Olmo 3 32B Think", + "name": "DeepSeek: DeepSeek V3.2 Speciale", "output_modalities": ["text"], - "permaslug": "allenai/olmo-3-32b-think-20251121", + "permaslug": "deepseek/deepseek-v3.2-speciale-20251201", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Olmo 3 32B Think", - "slug": "allenai/olmo-3-32b-think", + "short_name": "DeepSeek V3.2 Speciale", + "slug": "deepseek/deepseek-v3.2-speciale", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "allenai/olmo-3-32b-think-20251121", - "model_variant_slug": "allenai/olmo-3-32b-think", + "model_variant_permaslug": "deepseek/deepseek-v3.2-speciale-20251201", + "model_variant_slug": "deepseek/deepseek-v3.2-speciale", "moderation_required": false, - "name": "Parasail | allenai/olmo-3-32b-think-20251121", + "name": "Parasail | deepseek/deepseek-v3.2-speciale-20251201", "pricing": { - "completion": "0.0000005", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, "provider_display_name": "Parasail", "provider_info": { @@ -125143,11 +123863,11 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-olmo-3-32b-think", + "provider_model_id": "parasail-deepseek-v32-speciale", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/bf16", - "quantization": "bf16", + "provider_slug": "parasail/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", @@ -125155,15 +123875,14 @@ "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", "logit_bias", - "structured_outputs", - "response_format" + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -125179,44 +123898,44 @@ "system_prompt": null } }, - "group": "Other", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "allenai/Olmo-3-32B-Think", + "hf_slug": "deepseek-ai/DeepSeek-V3.2-Speciale", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "AllenAI: Olmo 3 32B Think", + "name": "DeepSeek: DeepSeek V3.2 Speciale", "output_modalities": ["text"], - "permaslug": "allenai/olmo-3-32b-think-20251121", + "permaslug": "deepseek/deepseek-v3.2-speciale-20251201", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Olmo 3 32B Think", - "slug": "allenai/olmo-3-32b-think", + "short_name": "DeepSeek V3.2 Speciale", + "slug": "deepseek/deepseek-v3.2-speciale", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "allenai", - "context_length": 65536, - "created_at": "2025-11-21T20:51:13.585428+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-12T05:12:39.645813+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, - "top_p": 0.95 + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["", "", ""], "default_system": null, - "description": "Olmo 3 7B Instruct is a supervised instruction-fine-tuned variant of the Olmo 3 7B base model, optimized for instruction-following, question-answering, and natural conversational dialogue. By leveraging high-quality instruction data and an open training pipeline, it delivers strong performance across everyday NLP tasks while remaining accessible and easy to integrate. Developed by Ai2 under the Apache 2.0 license, the model offers a transparent, community-friendly option for instruction-driven applications.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 65536, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -125225,7 +123944,7 @@ "training": false }, "features": { - "supports_input_audio": false, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -125235,7 +123954,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "3dfd1d32-ce94-4ed2-bfd8-0de2e0371646", + "id": "2f608ade-87b1-46ed-8ae7-2714dddf2abb", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -125244,21 +123963,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "allenai", - "context_length": 65536, - "created_at": "2025-11-21T20:51:13.585428+00:00", + "author": "google", + "context_length": 131072, + "created_at": "2025-03-12T05:12:39.645813+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, - "top_p": 0.95 + "temperature": null, + "top_p": null }, - "default_stops": [], + "default_stops": ["", "", ""], "default_system": null, - "description": "Olmo 3 7B Instruct is a supervised instruction-fine-tuned variant of the Olmo 3 7B base model, optimized for instruction-following, question-answering, and natural conversational dialogue. By leveraging high-quality instruction data and an open training pipeline, it delivers strong performance across everyday NLP tasks while remaining accessible and easy to integrate. Developed by Ai2 under the Apache 2.0 license, the model offers a transparent, community-friendly option for instruction-driven applications.", + "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", "features": { "chat_template_config": {}, "reasoning_config": { @@ -125267,41 +123986,36 @@ "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "allenai/Olmo-3-7B-Instruct", + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "AllenAI: Olmo 3 7B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B", "output_modalities": ["text"], - "permaslug": "allenai/olmo-3-7b-instruct-20251121", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Olmo 3 7B Instruct", - "slug": "allenai/olmo-3-7b-instruct", - "updated_at": "2025-11-22T00:00:24.418612+00:00", + "short_name": "Gemma 3 27B", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, - "model_variant_permaslug": "allenai/olmo-3-7b-instruct-20251121", - "model_variant_slug": "allenai/olmo-3-7b-instruct", + "model_variant_permaslug": "google/gemma-3-27b-it", + "model_variant_slug": "google/gemma-3-27b-it", "moderation_required": false, - "name": "Parasail | allenai/olmo-3-7b-instruct-20251121", + "name": "Parasail | google/gemma-3-27b-it", "pricing": { - "completion": "0.0000002", + "completion": "0.00000045", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000008" }, "provider_display_name": "Parasail", "provider_info": { @@ -125338,31 +124052,28 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-olmo-3-7b-instruct", + "provider_model_id": "parasail-gemma3-27b-it", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/bf16", - "quantization": "bf16", + "provider_slug": "parasail/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", "logit_bias", - "tools", - "tool_choice", - "structured_outputs", - "response_format" + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -125374,44 +124085,40 @@ "system_prompt": null } }, - "group": "Other", + "group": "Gemini", "has_text_output": true, - "hf_slug": "allenai/Olmo-3-7B-Instruct", + "hf_slug": "google/gemma-3-27b-it", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "AllenAI: Olmo 3 7B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": "gemma", + "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", + "name": "Google: Gemma 3 27B", "output_modalities": ["text"], - "permaslug": "allenai/olmo-3-7b-instruct-20251121", + "permaslug": "google/gemma-3-27b-it", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Olmo 3 7B Instruct", - "slug": "allenai/olmo-3-7b-instruct", - "updated_at": "2025-11-22T00:00:24.418612+00:00", + "short_name": "Gemma 3 27B", + "slug": "google/gemma-3-27b-it", + "updated_at": "2026-01-07T04:36:03.22387+00:00", "warning_message": null }, { - "author": "allenai", - "context_length": 65536, - "created_at": "2025-11-21T20:51:10.334947+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": 0.95 - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Olmo 3 7B Think is a research-oriented language model in the Olmo family designed for advanced reasoning and instruction-driven tasks. It excels at multi-step problem solving, logical inference, and maintaining coherent conversational context. Developed by Ai2 under the Apache 2.0 license, Olmo 3 7B Think supports transparent, fully open experimentation and provides a lightweight yet capable foundation for academic research and practical NLP workflows.", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 65536, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -125420,8 +124127,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -125431,7 +124136,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "62189f7c-259a-4dc3-8e85-7769034f981c", + "id": "d0567a96-c81f-4106-bc94-eb36da460587", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -125440,64 +124145,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "allenai", - "context_length": 65536, - "created_at": "2025-11-21T20:51:10.334947+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": 0.95 - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Olmo 3 7B Think is a research-oriented language model in the Olmo family designed for advanced reasoning and instruction-driven tasks. It excels at multi-step problem solving, logical inference, and maintaining coherent conversational context. Developed by Ai2 under the Apache 2.0 license, Olmo 3 7B Think supports transparent, fully open experimentation and provides a lightweight yet capable foundation for academic research and practical NLP workflows.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "allenai/Olmo-3-7B-Think", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "AllenAI: Olmo 3 7B Think", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "allenai/olmo-3-7b-think-20251121", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Olmo 3 7B Think", - "slug": "allenai/olmo-3-7b-think", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "allenai/olmo-3-7b-think-20251121", - "model_variant_slug": "allenai/olmo-3-7b-think", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "Parasail | allenai/olmo-3-7b-think-20251121", + "name": "Parasail | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0.0000002", + "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000012", - "request": "0", - "web_search": "0" + "prompt": "0.00000022" }, "provider_display_name": "Parasail", "provider_info": { @@ -125534,19 +124219,16 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-olmo-3-7b-think", + "provider_model_id": "parasail-llama-33-70b-fp8", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/bf16", - "quantization": "bf16", + "provider_slug": "parasail/int8", + "quantization": "int8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", @@ -125557,57 +124239,42 @@ "response_format" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "allenai/Olmo-3-7B-Think", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "AllenAI: Olmo 3 7B Think", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "allenai/olmo-3-7b-think-20251121", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Olmo 3 7B Think", - "slug": "allenai/olmo-3-7b-think", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "allenai", - "context_length": 65536, - "created_at": "2025-12-16T17:55:19+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": 0.95 - }, + "author": "meta-llama", + "context_length": 524288, + "created_at": "2025-04-05T19:37:02.129674+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Olmo 3.1 32B Think is a large-scale, 32-billion-parameter model designed for deep reasoning, complex multi-step logic, and advanced instruction following. Building on the Olmo 3 series, version 3.1 delivers refined reasoning behavior and stronger performance across demanding evaluations and nuanced conversational tasks. Developed by Ai2 under the Apache 2.0 license, Olmo 3.1 32B Think continues the Olmo initiative’s commitment to openness, providing full transparency across model weights, code, and training methodology.", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 65536, + "context_length": 524288, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -125616,8 +124283,10 @@ "training": false }, "features": { - "disable_free_endpoint_limits": true, - "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -125627,73 +124296,53 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "73409d73-26ee-4ef9-9055-aee54589583f", + "id": "48b01f86-ff55-464f-ac51-01f97ebf88a1", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 50, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": 32768, "max_prompt_tokens": null, - "max_tokens_per_image": null, + "max_tokens_per_image": 3342, "model": { - "author": "allenai", - "context_length": 65536, - "created_at": "2025-12-16T17:55:19+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": 0.95 - }, + "author": "meta-llama", + "context_length": 1048576, + "created_at": "2025-04-05T19:37:02.129674+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Olmo 3.1 32B Think is a large-scale, 32-billion-parameter model designed for deep reasoning, complex multi-step logic, and advanced instruction following. Building on the Olmo 3 series, version 3.1 delivers refined reasoning behavior and stronger performance across demanding evaluations and nuanced conversational tasks. Developed by Ai2 under the Apache 2.0 license, Olmo 3.1 32B Think continues the Olmo initiative’s commitment to openness, providing full transparency across model weights, code, and training methodology.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": "allenai/Olmo-3.1-32B-Think", + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "AllenAI: Olmo 3.1 32B Think", + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "allenai/olmo-3.1-32b-think-20251215", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "reasoning_config": null, "router": null, - "short_name": "Olmo 3.1 32B Think", - "slug": "allenai/olmo-3.1-32b-think", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "allenai/olmo-3.1-32b-think-20251215", - "model_variant_slug": "allenai/olmo-3.1-32b-think", + "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "model_variant_slug": "meta-llama/llama-4-maverick", "moderation_required": false, - "name": "Parasail | allenai/olmo-3.1-32b-think-20251215", + "name": "Parasail | meta-llama/llama-4-maverick-17b-128e-instruct", "pricing": { - "completion": "0.0000005", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000035" }, "provider_display_name": "Parasail", "provider_info": { @@ -125730,76 +124379,64 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-olmo-31-32b-think", + "provider_model_id": "parasail-llama-4-maverick-instruct-fp8", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/bf16", - "quantization": "bf16", + "provider_slug": "parasail/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", - "logit_bias", - "structured_outputs", - "response_format" + "logit_bias" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama4", "has_text_output": true, - "hf_slug": "allenai/Olmo-3.1-32B-Think", + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "AllenAI: Olmo 3.1 32B Think", + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "allenai/olmo-3.1-32b-think-20251215", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "reasoning_config": null, "router": null, - "short_name": "Olmo 3.1 32B Think", - "slug": "allenai/olmo-3.1-32b-think", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "bytedance", - "context_length": 128000, - "created_at": "2025-07-22T17:24:16.94785+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 131072, + "created_at": "2025-06-20T18:10:16.960494+00:00", + "default_parameters": { + "temperature": 0.3 + }, "default_stops": [], "default_system": null, - "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement learning-based reasoning, enabling robust action planning and execution across virtual interfaces.\n\nThis model achieves state-of-the-art results on a range of interactive and grounding benchmarks, including OSworld, WebVoyager, AndroidWorld, and ScreenSpot. It also demonstrates perfect task completion across diverse Poki games and outperforms prior models in Minecraft agent tasks. UI-TARS-1.5 supports thought decomposition during inference and shows strong scaling across variants, with the 1.5 version notably exceeding the performance of earlier 72B and 7B checkpoints.", + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -125808,7 +124445,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -125818,7 +124454,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "22a474a1-cb4e-42d2-b19a-c1be103f5abd", + "id": "fa50f0f7-ddc9-4155-bd92-d20c92dec640", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -125827,59 +124463,46 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": 15000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "bytedance", + "author": "mistralai", "context_length": 128000, - "created_at": "2025-07-22T17:24:16.94785+00:00", - "default_parameters": {}, + "created_at": "2025-06-20T18:10:16.960494+00:00", + "default_parameters": { + "temperature": 0.3 + }, "default_stops": [], "default_system": null, - "description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement learning-based reasoning, enabling robust action planning and execution across virtual interfaces.\n\nThis model achieves state-of-the-art results on a range of interactive and grounding benchmarks, including OSworld, WebVoyager, AndroidWorld, and ScreenSpot. It also demonstrates perfect task completion across diverse Poki games and outperforms prior models in Minecraft agent tasks. UI-TARS-1.5 supports thought decomposition during inference and shows strong scaling across variants, with the 1.5 version notably exceeding the performance of earlier 72B and 7B checkpoints.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "ByteDance-Seed/UI-TARS-1.5-7B", + "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "hf_updated_at": null, "hidden": false, "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "ByteDance: UI-TARS 7B ", + "name": "Mistral: Mistral Small 3.2 24B", "output_modalities": ["text"], - "permaslug": "bytedance/ui-tars-1.5-7b", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", + "reasoning_config": null, "router": null, - "short_name": "UI-TARS 7B ", - "slug": "bytedance/ui-tars-1.5-7b", + "short_name": "Mistral Small 3.2 24B", + "slug": "mistralai/mistral-small-3.2-24b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "bytedance/ui-tars-1.5-7b", - "model_variant_slug": "bytedance/ui-tars-1.5-7b", + "model_variant_permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", + "model_variant_slug": "mistralai/mistral-small-3.2-24b-instruct", "moderation_required": false, - "name": "Parasail | bytedance/ui-tars-1.5-7b", + "name": "Parasail | mistralai/mistral-small-3.2-24b-instruct-2506", "pricing": { - "completion": "0.0000002", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000009" }, "provider_display_name": "Parasail", "provider_info": { @@ -125916,7 +124539,7 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-ui-tars-1p5-7b", + "provider_model_id": "parasail-mistral-small-32-24b", "provider_name": "Parasail", "provider_region": null, "provider_slug": "parasail/bf16", @@ -125926,13 +124549,14 @@ "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", - "logit_bias" + "logit_bias", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, @@ -125940,51 +124564,37 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Mistral", "has_text_output": true, - "hf_slug": "ByteDance-Seed/UI-TARS-1.5-7B", + "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", "hf_updated_at": null, "hidden": false, "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "ByteDance: UI-TARS 7B ", + "name": "Mistral: Mistral Small 3.2 24B", "output_modalities": ["text"], - "permaslug": "bytedance/ui-tars-1.5-7b", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", + "reasoning_config": null, "router": null, - "short_name": "UI-TARS 7B ", - "slug": "bytedance/ui-tars-1.5-7b", + "short_name": "Mistral Small 3.2 24B", + "slug": "mistralai/mistral-small-3.2-24b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-12-01T13:10:42.818885+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -125993,17 +124603,20 @@ "training": false }, "features": { - "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, - "literal_none": false, + "literal_none": true, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "4c18a672-9069-469d-9e6b-8af2300fe7b7", + "id": "e258c373-8af0-4cd8-b314-5fe727ac0d00", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -126012,64 +124625,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 163840, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-12-01T13:10:42.818885+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism that reduces training and inference cost while preserving quality in long-context scenarios. A scalable reinforcement learning post-training framework further improves reasoning, with reported performance in the GPT-5 class, and the model has demonstrated gold-medal results on the 2025 IMO and IOI. V3.2 also uses a large-scale agentic task synthesis pipeline to better integrate reasoning into tool-use settings, boosting compliance and generalization in interactive environments.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.2-20251201", - "model_variant_slug": "deepseek/deepseek-v3.2", + "model_variant_permaslug": "moonshotai/kimi-k2-0905", + "model_variant_slug": "moonshotai/kimi-k2-0905", "moderation_required": false, - "name": "Parasail | deepseek/deepseek-v3.2-20251201", + "name": "Parasail | moonshotai/kimi-k2-0905", "pricing": { - "completion": "0.00000045", + "completion": "0.00000299", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000028", - "request": "0", - "web_search": "0" + "prompt": "0.00000059" }, "provider_display_name": "Parasail", "provider_info": { @@ -126106,79 +124709,78 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-deepseek-v32", + "provider_model_id": "parasail-kimi-k2-instruct", "provider_name": "Parasail", "provider_region": null, "provider_slug": "parasail/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", "logit_bias", - "response_format" + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-20251201", + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2", - "slug": "deepseek/deepseek-v3.2", - "updated_at": "2025-12-01T14:46:05.824401+00:00", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-12-01T13:13:57.971996+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2-Speciale is a high-compute variant of DeepSeek-V3.2 optimized for maximum reasoning and agentic performance. It builds on DeepSeek Sparse Attention (DSA) for efficient long-context processing, then scales post-training reinforcement learning to push capability beyond the base model. Reported evaluations place Speciale ahead of GPT-5 on difficult reasoning workloads, with proficiency comparable to Gemini-3.0-Pro, while retaining strong coding and tool-use reliability. Like V3.2, it benefits from a large-scale agentic task synthesis pipeline that improves compliance and generalization in interactive environments.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -126188,6 +124790,7 @@ }, "features": { "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -126197,7 +124800,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f24a9a98-87af-4e35-8744-5fbb344f12d7", + "id": "177ccdbb-550b-4166-ab57-a015dbff428e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -126206,21 +124809,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 163840, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-12-01T13:13:57.971996+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 1, - "top_p": 0.95 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.2-Speciale is a high-compute variant of DeepSeek-V3.2 optimized for maximum reasoning and agentic performance. It builds on DeepSeek Sparse Attention (DSA) for efficient long-context processing, then scales post-training reinforcement learning to push capability beyond the base model. Reported evaluations place Speciale ahead of GPT-5 on difficult reasoning workloads, with proficiency comparable to Gemini-3.0-Pro, while retaining strong coding and tool-use reliability. Like V3.2, it benefits from a large-scale agentic task synthesis pipeline that improves compliance and generalization in interactive environments.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -126229,41 +124832,36 @@ "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2-Speciale", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2 Speciale", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-speciale-20251201", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2 Speciale", - "slug": "deepseek/deepseek-v3.2-speciale", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-v3.2-speciale-20251201", - "model_variant_slug": "deepseek/deepseek-v3.2-speciale", + "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", + "model_variant_slug": "moonshotai/kimi-k2-thinking", "moderation_required": false, - "name": "Parasail | deepseek/deepseek-v3.2-speciale-20251201", + "name": "Parasail | moonshotai/kimi-k2-thinking-20251106", "pricing": { - "completion": "0.0000005", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, "provider_display_name": "Parasail", "provider_info": { @@ -126300,11 +124898,11 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-deepseek-v32-speciale", + "provider_model_id": "parasail-kimi-k2-thinking", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/fp8", - "quantization": "fp8", + "provider_slug": "parasail/int4", + "quantization": "int4", "supported_parameters": [ "reasoning", "include_reasoning", @@ -126312,13 +124910,16 @@ "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", - "logit_bias" + "logit_bias", + "logprobs", + "top_logprobs", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, @@ -126334,44 +124935,44 @@ "system_prompt": null } }, - "group": "DeepSeek", + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.2-Speciale", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.2 Speciale", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-v3.2-speciale-20251201", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.2 Speciale", - "slug": "deepseek/deepseek-v3.2-speciale", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-12T05:12:39.645813+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["", "", ""], + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -126380,7 +124981,7 @@ "training": false }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -126390,7 +124991,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "2f608ade-87b1-46ed-8ae7-2714dddf2abb", + "id": "ef5d8dac-e71b-4ba0-bc09-f01fabf23d3d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -126399,64 +125000,57 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 131072, - "created_at": "2025-03-12T05:12:39.645813+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": ["", "", ""], + "default_stops": [], "default_system": null, - "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling. Gemma 3 27B is Google's latest open source model, successor to [Gemma 2](google/gemma-2-27b-it)", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "features": { "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, - "group": "Gemini", + "group": "Other", "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-3-27b-it", - "model_variant_slug": "google/gemma-3-27b-it", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "Parasail | google/gemma-3-27b-it", + "name": "Parasail | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.00000045", + "completion": "0.0000028", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000008", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "Parasail", "provider_info": { @@ -126493,27 +125087,32 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-gemma3-27b-it", + "provider_model_id": "parasail-kimi-k25", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/fp8", - "quantization": "fp8", + "provider_slug": "parasail/int4", + "quantization": "int4", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", - "logit_bias" + "logit_bias", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, @@ -126521,40 +125120,42 @@ "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, - "group": "Gemini", + "group": "Other", "has_text_output": true, - "hf_slug": "google/gemma-3-27b-it", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], - "instruct_type": "gemma", - "model_version_group_id": "c99b277a-cfaf-4e93-9360-8a79cfa2b2c4", - "name": "Google: Gemma 3 27B", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "google/gemma-3-27b-it", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "Gemma 3 27B", - "slug": "google/gemma-3-27b-it", - "updated_at": "2026-01-07T04:36:03.22387+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, { - "author": "meta-llama", + "author": "openai", "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "created_at": "2025-08-05T17:17:11+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, @@ -126567,6 +125168,10 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -126576,7 +125181,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "d0567a96-c81f-4106-bc94-eb36da460587", + "id": "cedca069-169c-4bc2-963a-7ad3a21380c8", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -126589,45 +125194,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", + "author": "openai", "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "created_at": "2025-08-05T17:17:11+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", - "features": {}, - "group": "Llama3", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-120b", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_permaslug": "openai/gpt-oss-120b", + "model_variant_slug": "openai/gpt-oss-120b", "moderation_required": false, - "name": "Parasail | meta-llama/llama-3.3-70b-instruct", + "name": "Parasail | openai/gpt-oss-120b", "pricing": { - "completion": "0.0000005", + "completion": "0.00000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000022", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Parasail", "provider_info": { @@ -126664,61 +125279,79 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-llama-33-70b-fp8", + "provider_model_id": "parasail-gpt-oss-120b", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/int8", - "quantization": "int8", + "provider_slug": "parasail/fp4", + "quantization": "fp4", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", - "logit_bias" + "logit_bias", + "response_format" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-120b", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 524288, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", - "endpoint": { + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 524288, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -126727,10 +125360,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -126740,7 +125369,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "48b01f86-ff55-464f-ac51-01f97ebf88a1", + "id": "8febad8c-bc14-42f6-9b78-67e46f6c0cfb", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -126749,49 +125378,61 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": 131072, "max_prompt_tokens": null, - "max_tokens_per_image": 3342, + "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", - "features": {}, - "group": "Llama4", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-20b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "model_variant_slug": "meta-llama/llama-4-maverick", + "model_variant_permaslug": "openai/gpt-oss-20b", + "model_variant_slug": "openai/gpt-oss-20b", "moderation_required": false, - "name": "Parasail | meta-llama/llama-4-maverick-17b-128e-instruct", + "name": "Parasail | openai/gpt-oss-20b", "pricing": { - "completion": "0.000001", + "completion": "0.0000002", "discount": 0, - "image": "0.00070182", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000035", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, "provider_display_name": "Parasail", "provider_info": { @@ -126828,63 +125469,77 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-llama-4-maverick-instruct-fp8", + "provider_model_id": "parasail-gpt-oss-20b", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/fp8", - "quantization": "fp8", + "provider_slug": "parasail/fp4", + "quantization": "fp4", "supported_parameters": [ - "structured_outputs", - "response_format", + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", "logit_bias", - "tools", - "tool_choice" + "structured_outputs", + "response_format" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_reasoning": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama4", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-20b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "mistralai", + "author": "prime-intellect", "context_length": 131072, - "created_at": "2025-06-20T18:10:16.960494+00:00", + "created_at": "2025-11-27T03:02:14.49479+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": 0.6, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", + "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, @@ -126897,16 +125552,19 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "fa50f0f7-ddc9-4155-bd92-d20c92dec640", + "id": "826423d6-e76a-4260-a358-c6b449bd0c0e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -126915,51 +125573,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 15000, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 128000, - "created_at": "2025-06-20T18:10:16.960494+00:00", + "author": "prime-intellect", + "context_length": 131072, + "created_at": "2025-11-27T03:02:14.49479+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": 0.6, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on WildBench and Arena Hard, reduces infinite generations, and delivers gains in tool use and structured output tasks.\n\nIt supports image and text inputs with structured outputs, function/tool calling, and strong performance across coding (HumanEval+, MBPP), STEM (MMLU, MATH, GPQA), and vision benchmarks (ChartQA, DocVQA).", - "features": {}, - "group": "Mistral", + "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "hf_slug": "PrimeIntellect/INTELLECT-3-FP8", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3.2 24B", + "name": "Prime Intellect: INTELLECT-3", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "reasoning_config": null, + "permaslug": "prime-intellect/intellect-3-20251126", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral Small 3.2 24B", - "slug": "mistralai/mistral-small-3.2-24b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "INTELLECT-3", + "slug": "prime-intellect/intellect-3", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "model_variant_slug": "mistralai/mistral-small-3.2-24b-instruct", + "model_variant_permaslug": "prime-intellect/intellect-3-20251126", + "model_variant_slug": "prime-intellect/intellect-3", "moderation_required": false, - "name": "Parasail | mistralai/mistral-small-3.2-24b-instruct-2506", + "name": "Parasail | prime-intellect/intellect-3-20251126", "pricing": { - "completion": "0.0000006", + "completion": "0.0000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Parasail", "provider_info": { @@ -126996,61 +125662,77 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-mistral-small-32-24b", + "provider_model_id": "parasail-primeintellect3", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/bf16", - "quantization": "bf16", + "provider_slug": "parasail/fp8", + "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", - "logit_bias" + "logit_bias", + "tool_choice", + "tools", + "structured_outputs", + "response_format" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-3.2-24B-Instruct-2506", + "hf_slug": "PrimeIntellect/INTELLECT-3-FP8", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3.2 24B", + "name": "Prime Intellect: INTELLECT-3", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-3.2-24b-instruct-2506", - "reasoning_config": null, + "permaslug": "prime-intellect/intellect-3-20251126", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral Small 3.2 24B", - "slug": "mistralai/mistral-small-3.2-24b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "INTELLECT-3", + "slug": "prime-intellect/intellect-3", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", + "author": "qwen", + "context_length": 128000, + "created_at": "2025-02-01T11:45:11.997326+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -127059,10 +125741,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -127072,7 +125751,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "e258c373-8af0-4cd8-b314-5fe727ac0d00", + "id": "3f0b8e14-7b24-431d-a19e-f2e257967b0a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -127081,59 +125760,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", + "author": "qwen", + "context_length": 131072, + "created_at": "2025-02-01T11:45:11.997326+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen2.5 VL 72B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "qwen/qwen2.5-vl-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Qwen2.5 VL 72B Instruct", + "slug": "qwen/qwen2.5-vl-72b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-0905", - "model_variant_slug": "moonshotai/kimi-k2-0905", + "model_variant_permaslug": "qwen/qwen2.5-vl-72b-instruct", + "model_variant_slug": "qwen/qwen2.5-vl-72b-instruct", "moderation_required": false, - "name": "Parasail | moonshotai/kimi-k2-0905", + "name": "Parasail | qwen/qwen2.5-vl-72b-instruct", "pricing": { - "completion": "0.00000299", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000059", - "request": "0", - "web_search": "0" + "prompt": "0.0000008" }, "provider_display_name": "Parasail", "provider_info": { @@ -127170,79 +125834,62 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-kimi-k2-instruct", + "provider_model_id": "parasail-qwen25-vl-72b-instruct", "provider_name": "Parasail", "provider_region": null, "provider_slug": "parasail/fp8", "quantization": "fp8", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", "logit_bias", - "tools", - "tool_choice" + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen2.5 VL 72B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "qwen/qwen2.5-vl-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Qwen2.5 VL 72B Instruct", + "slug": "qwen/qwen2.5-vl-72b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 131072, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -127251,8 +125898,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -127262,73 +125907,63 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "177ccdbb-550b-4166-ab57-a015dbff428e", + "id": "043eb28f-fc28-4b0d-9800-b249fcfcfbf9", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 18, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", + "author": "qwen", "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "Parasail | moonshotai/kimi-k2-thinking-20251106", + "name": "Parasail | qwen/qwen3-235b-a22b-07-25", "pricing": { - "completion": "0.0000025", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Parasail", "provider_info": { @@ -127365,82 +126000,78 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-kimi-k2-thinking", + "provider_model_id": "parasail-qwen3-235b-a22b-instruct-2507", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/int4", - "quantization": "int4", + "provider_slug": "parasail/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", "logit_bias", - "logprobs", - "top_logprobs", - "structured_outputs", - "response_format" + "tools", + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2026-02-04T00:15:01.820167+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per token, delivering performance comparable to models with 10 to 20x higher active compute, which makes it well suited for cost-sensitive, always-on agent deployment.\n\nThe model is trained with a strong agentic focus and performs reliably on long-horizon coding tasks, complex tool usage, and recovery from execution failures. With a native 256k context window, it integrates cleanly into real-world CLI and IDE environments and adapts well to common agent scaffolds used by modern coding tools. The model operates exclusively in non-thinking mode and does not emit blocks, simplifying integration for production coding agents.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -127449,10 +126080,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -127462,7 +126089,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cedca069-169c-4bc2-963a-7ad3a21380c8", + "id": "abacc5e9-91e6-40bf-93c6-9112334a4042", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -127471,64 +126098,57 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2026-02-04T00:15:01.820167+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per token, delivering performance comparable to models with 10 to 20x higher active compute, which makes it well suited for cost-sensitive, always-on agent deployment.\n\nThe model is trained with a strong agentic focus and performs reliably on long-horizon coding tasks, complex tool usage, and recovery from execution failures. With a native 256k context window, it integrates cleanly into real-world CLI and IDE environments and adapts well to common agent scaffolds used by modern coding tools. The model operates exclusively in non-thinking mode and does not emit blocks, simplifying integration for production coding agents.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, - "group": "GPT", + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen3-Coder-Next", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Qwen: Qwen3 Coder Next", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "qwen/qwen3-coder-next-2025-02-03", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Coder Next", + "slug": "qwen/qwen3-coder-next", + "updated_at": "2026-02-04T00:27:00.409072+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b", - "model_variant_slug": "openai/gpt-oss-120b", + "model_variant_permaslug": "qwen/qwen3-coder-next-2025-02-03", + "model_variant_slug": "qwen/qwen3-coder-next", "moderation_required": false, - "name": "Parasail | openai/gpt-oss-120b", + "name": "Parasail | qwen/qwen3-coder-next-2025-02-03", "pricing": { - "completion": "0.00000075", + "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000012", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Parasail", "provider_info": { @@ -127565,79 +126185,73 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-gpt-oss-120b", + "provider_model_id": "parasail-qwen3-coder-next", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/fp4", - "quantization": "fp4", + "provider_slug": "parasail/bf16", + "quantization": "bf16", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", - "logit_bias" + "logit_bias", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, - "group": "GPT", + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen3-Coder-Next", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Qwen: Qwen3 Coder Next", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "qwen/qwen3-coder-next-2025-02-03", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Coder Next", + "slug": "qwen/qwen3-coder-next", + "updated_at": "2026-02-04T00:27:00.409072+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -127655,7 +126269,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "8febad8c-bc14-42f6-9b78-67e46f6c0cfb", + "id": "566f9428-0fa4-42dc-8d00-48965e87a9cc", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -127664,66 +126278,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", "moderation_required": false, - "name": "Parasail | openai/gpt-oss-20b", + "name": "Parasail | qwen/qwen3-next-80b-a3b-instruct-2509", "pricing": { - "completion": "0.0000002", + "completion": "0.0000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Parasail", "provider_info": { @@ -127760,78 +126362,74 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-gpt-oss-20b", + "provider_model_id": "parasail-qwen-3-next-80b-instruct", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/fp4", - "quantization": "fp4", + "provider_slug": "parasail/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", "logit_bias", + "response_format", "structured_outputs", - "response_format" + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "prime-intellect", + "author": "qwen", "context_length": 131072, - "created_at": "2025-11-27T03:02:14.49479+00:00", + "created_at": "2025-09-23T23:04:47+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, @@ -127844,9 +126442,10 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", - "supports_input_audio": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -127856,7 +126455,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "826423d6-e76a-4260-a358-c6b449bd0c0e", + "id": "9d2c36b8-0885-4ea2-9df4-954e856e4f7a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -127865,64 +126464,58 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "prime-intellect", + "author": "qwen", "context_length": 131072, - "created_at": "2025-11-27T03:02:14.49479+00:00", + "created_at": "2025-09-23T23:04:47+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math, code, science, and general reasoning, consistently outperforming many larger frontier models. Designed for strong multi-step problem solving, it maintains high accuracy on structured tasks while remaining efficient at inference thanks to its MoE architecture.", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "PrimeIntellect/INTELLECT-3-FP8", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Prime Intellect: INTELLECT-3", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", "output_modalities": ["text"], - "permaslug": "prime-intellect/intellect-3-20251126", + "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "INTELLECT-3", - "slug": "prime-intellect/intellect-3", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 VL 235B A22B Instruct", + "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "prime-intellect/intellect-3-20251126", - "model_variant_slug": "prime-intellect/intellect-3", + "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", + "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", "moderation_required": false, - "name": "Parasail | prime-intellect/intellect-3-20251126", + "name": "Parasail | qwen/qwen3-vl-235b-a22b-instruct", "pricing": { - "completion": "0.0000011", + "completion": "0.0000019", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000021" }, "provider_display_name": "Parasail", "provider_info": { @@ -127959,78 +126552,78 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-primeintellect3", + "provider_model_id": "parasail-qwen3-vl-235b-a22b-instruct", "provider_name": "Parasail", "provider_region": null, "provider_slug": "parasail/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", "logit_bias", - "tool_choice", "tools", - "structured_outputs", - "response_format" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "PrimeIntellect/INTELLECT-3-FP8", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Prime Intellect: INTELLECT-3", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", "output_modalities": ["text"], - "permaslug": "prime-intellect/intellect-3-20251126", + "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "INTELLECT-3", - "slug": "prime-intellect/intellect-3", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 VL 235B A22B Instruct", + "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 128000, - "created_at": "2025-02-01T11:45:11.997326+00:00", - "default_parameters": {}, + "context_length": 262144, + "created_at": "2025-10-14T17:35:08.402158+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.7, + "top_p": 0.8 + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -128039,17 +126632,16 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": true, - "id": "3f0b8e14-7b24-431d-a19e-f2e257967b0a", + "id": "91501e15-158f-48c3-967c-faf664e98284", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -128058,49 +126650,58 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2025-02-01T11:45:11.997326+00:00", - "default_parameters": {}, + "context_length": 256000, + "created_at": "2025-10-14T17:35:08.402158+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.7, + "top_p": 0.8 + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", - "features": {}, - "group": "Qwen", + "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "Qwen: Qwen3 VL 8B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-vl-8b-instruct", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", + "short_name": "Qwen3 VL 8B Instruct", + "slug": "qwen/qwen3-vl-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen2.5-vl-72b-instruct", - "model_variant_slug": "qwen/qwen2.5-vl-72b-instruct", + "model_variant_permaslug": "qwen/qwen3-vl-8b-instruct", + "model_variant_slug": "qwen/qwen3-vl-8b-instruct", "moderation_required": false, - "name": "Parasail | qwen/qwen2.5-vl-72b-instruct", + "name": "Parasail | qwen/qwen3-vl-8b-instruct", "pricing": { - "completion": "0.000001", + "completion": "0.00000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000008", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, "provider_display_name": "Parasail", "provider_info": { @@ -128137,57 +126738,74 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-qwen25-vl-72b-instruct", + "provider_model_id": "parasail-qwen3vl-8b-instruct", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/fp8", - "quantization": "fp8", + "provider_slug": "parasail/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", - "logit_bias" + "logit_bias", + "tools", + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Qwen", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["image", "text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "Qwen: Qwen3 VL 8B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-vl-8b-instruct", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", + "short_name": "Qwen3 VL 8B Instruct", + "slug": "qwen/qwen3-vl-8b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", + "author": "thedrummer", "context_length": 131072, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, + "created_at": "2025-09-27T00:11:18.116138+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, @@ -128209,26 +126827,30 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "043eb28f-fc28-4b0d-9800-b249fcfcfbf9", + "id": "4d46db86-63fe-47eb-85f1-6a7567af190c", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 18, + "limit_rpm": null, "limit_rpm_cf": null, "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, + "author": "thedrummer", + "context_length": 131072, + "created_at": "2025-09-27T00:11:18.116138+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", "features": { "reasoning_config": { "end_token": null, @@ -128236,41 +126858,36 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "thedrummer/cydonia-24b-v4.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "TheDrummer: Cydonia 24B V4.1", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "thedrummer/cydonia-24b-v4.1", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Cydonia 24B V4.1", + "slug": "thedrummer/cydonia-24b-v4.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "thedrummer/cydonia-24b-v4.1", + "model_variant_slug": "thedrummer/cydonia-24b-v4.1", "moderation_required": false, - "name": "Parasail | qwen/qwen3-235b-a22b-07-25", + "name": "Parasail | thedrummer/cydonia-24b-v4.1", "pricing": { - "completion": "0.0000014", + "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000019", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "Parasail", "provider_info": { @@ -128307,29 +126924,26 @@ "slug": "parasail", "statusPageUrl": null }, - "provider_model_id": "parasail-qwen3-235b-a22b-instruct-2507", + "provider_model_id": "parasail-cydonia-24-v41", "provider_name": "Parasail", "provider_region": null, - "provider_slug": "parasail/fp8", - "quantization": "fp8", + "provider_slug": "parasail/bf16", + "quantization": "bf16", "supported_parameters": [ "max_tokens", "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", "stop", "top_k", - "logit_bias", - "tools", - "tool_choice" + "logit_bias" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -128340,40 +126954,40 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "thedrummer/cydonia-24b-v4.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "TheDrummer: Cydonia 24B V4.1", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "thedrummer/cydonia-24b-v4.1", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Cydonia 24B V4.1", + "slug": "thedrummer/cydonia-24b-v4.1", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", + "author": "thedrummer", + "context_length": 32768, + "created_at": "2025-03-10T19:56:06.00791+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "description": "Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.", "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -128391,7 +127005,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "566f9428-0fa4-42dc-8d00-48965e87a9cc", + "id": "1eb01ded-ae11-49e6-8aa6-3067584070bd", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -128400,781 +127014,27 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", + "author": "thedrummer", + "context_length": 32768, + "created_at": "2025-03-10T19:56:06.00791+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.", + "features": {}, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "hf_slug": "TheDrummer/Skyfall-36B-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", - "moderation_required": false, - "name": "Parasail | qwen/qwen3-next-80b-a3b-instruct-2509", - "pricing": { - "completion": "0.0000011", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "Parasail", - "provider_info": { - "adapterName": "ParasailAdapter", - "baseUrl": "https://api.parasail.io/v1", - "byokEnabled": true, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.parasail.io/legal/terms", - "training": false - }, - "displayName": "Parasail", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" - }, - "ignoredProviderModels": [ - "parasail-olmo-2-1124-7b-instruct", - "parasail-qwen3-omni-30b-a3b-thinking", - "parasail-qwen3-omni-30b-a3b-instruct", - "parasail-dots-ocr", - "parasail-auto-glm-9b-multilingual" - ], - "isAbortable": true, - "isMultipartSupported": true, - "moderationRequired": false, - "name": "Parasail", - "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], - "slug": "parasail", - "statusPageUrl": null - }, - "provider_model_id": "parasail-qwen-3-next-80b-instruct", - "provider_name": "Parasail", - "provider_region": null, - "provider_slug": "parasail/fp8", - "quantization": "fp8", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "frequency_penalty", - "min_p", - "presence_penalty", - "repetition_penalty", - "seed", - "stop", - "top_k", - "logit_bias" - ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, - "variable_pricings": [], - "variant": "standard" - }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-23T23:04:47+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, - "default_stops": [], - "default_system": null, - "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", - "endpoint": { - "adapter_name": "ParasailAdapter", - "can_abort": true, - "context_length": 131072, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.parasail.io/legal/terms", - "training": false - }, - "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": true, - "id": "9d2c36b8-0885-4ea2-9df4-954e856e4f7a", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 32768, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-23T23:04:47+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, - "default_stops": [], - "default_system": null, - "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Qwen3 VL 235B A22B Instruct", - "slug": "qwen/qwen3-vl-235b-a22b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", - "moderation_required": false, - "name": "Parasail | qwen/qwen3-vl-235b-a22b-instruct", - "pricing": { - "completion": "0.0000019", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000021", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "Parasail", - "provider_info": { - "adapterName": "ParasailAdapter", - "baseUrl": "https://api.parasail.io/v1", - "byokEnabled": true, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.parasail.io/legal/terms", - "training": false - }, - "displayName": "Parasail", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" - }, - "ignoredProviderModels": [ - "parasail-olmo-2-1124-7b-instruct", - "parasail-qwen3-omni-30b-a3b-thinking", - "parasail-qwen3-omni-30b-a3b-instruct", - "parasail-dots-ocr", - "parasail-auto-glm-9b-multilingual" - ], - "isAbortable": true, - "isMultipartSupported": true, - "moderationRequired": false, - "name": "Parasail", - "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], - "slug": "parasail", - "statusPageUrl": null - }, - "provider_model_id": "parasail-qwen3-vl-235b-a22b-instruct", - "provider_name": "Parasail", - "provider_region": null, - "provider_slug": "parasail/fp8", - "quantization": "fp8", - "supported_parameters": [ - "structured_outputs", - "response_format", - "max_tokens", - "temperature", - "top_p", - "frequency_penalty", - "min_p", - "presence_penalty", - "repetition_penalty", - "seed", - "stop", - "top_k", - "logit_bias", - "tools", - "tool_choice" - ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "standard" - }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Qwen3 VL 235B A22B Instruct", - "slug": "qwen/qwen3-vl-235b-a22b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-14T17:35:08.402158+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, - "default_stops": [], - "default_system": null, - "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", - "endpoint": { - "adapter_name": "ParasailAdapter", - "can_abort": true, - "context_length": 262144, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.parasail.io/legal/terms", - "training": false - }, - "features": { - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": true, - "id": "91501e15-158f-48c3-967c-faf664e98284", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 262144, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "qwen", - "context_length": 256000, - "created_at": "2025-10-14T17:35:08.402158+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, - "default_stops": [], - "default_system": null, - "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["image", "text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 8B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-8b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Qwen3 VL 8B Instruct", - "slug": "qwen/qwen3-vl-8b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "qwen/qwen3-vl-8b-instruct", - "model_variant_slug": "qwen/qwen3-vl-8b-instruct", - "moderation_required": false, - "name": "Parasail | qwen/qwen3-vl-8b-instruct", - "pricing": { - "completion": "0.00000075", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "Parasail", - "provider_info": { - "adapterName": "ParasailAdapter", - "baseUrl": "https://api.parasail.io/v1", - "byokEnabled": true, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.parasail.io/legal/terms", - "training": false - }, - "displayName": "Parasail", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" - }, - "ignoredProviderModels": [ - "parasail-olmo-2-1124-7b-instruct", - "parasail-qwen3-omni-30b-a3b-thinking", - "parasail-qwen3-omni-30b-a3b-instruct", - "parasail-dots-ocr", - "parasail-auto-glm-9b-multilingual" - ], - "isAbortable": true, - "isMultipartSupported": true, - "moderationRequired": false, - "name": "Parasail", - "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], - "slug": "parasail", - "statusPageUrl": null - }, - "provider_model_id": "parasail-qwen3vl-8b-instruct", - "provider_name": "Parasail", - "provider_region": null, - "provider_slug": "parasail/bf16", - "quantization": "bf16", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "frequency_penalty", - "min_p", - "presence_penalty", - "repetition_penalty", - "seed", - "stop", - "top_k", - "logit_bias", - "structured_outputs", - "response_format" - ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, - "variable_pricings": [], - "variant": "standard" - }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["image", "text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 8B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-8b-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Qwen3 VL 8B Instruct", - "slug": "qwen/qwen3-vl-8b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "thedrummer", - "context_length": 131072, - "created_at": "2025-09-27T00:11:18.116138+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], - "default_system": null, - "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", - "endpoint": { - "adapter_name": "ParasailAdapter", - "can_abort": true, - "context_length": 131072, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.parasail.io/legal/terms", - "training": false - }, - "features": { - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": true, - "id": "4d46db86-63fe-47eb-85f1-6a7567af190c", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 131072, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "thedrummer", - "context_length": 131072, - "created_at": "2025-09-27T00:11:18.116138+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], - "default_system": null, - "description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": true, - "hf_slug": "thedrummer/cydonia-24b-v4.1", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "TheDrummer: Cydonia 24B V4.1", - "output_modalities": ["text"], - "permaslug": "thedrummer/cydonia-24b-v4.1", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Cydonia 24B V4.1", - "slug": "thedrummer/cydonia-24b-v4.1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "thedrummer/cydonia-24b-v4.1", - "model_variant_slug": "thedrummer/cydonia-24b-v4.1", - "moderation_required": false, - "name": "Parasail | thedrummer/cydonia-24b-v4.1", - "pricing": { - "completion": "0.0000005", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "Parasail", - "provider_info": { - "adapterName": "ParasailAdapter", - "baseUrl": "https://api.parasail.io/v1", - "byokEnabled": true, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.parasail.io/legal/terms", - "training": false - }, - "displayName": "Parasail", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.parasail.io/&size=256" - }, - "ignoredProviderModels": [ - "parasail-olmo-2-1124-7b-instruct", - "parasail-qwen3-omni-30b-a3b-thinking", - "parasail-qwen3-omni-30b-a3b-instruct", - "parasail-dots-ocr", - "parasail-auto-glm-9b-multilingual" - ], - "isAbortable": true, - "isMultipartSupported": true, - "moderationRequired": false, - "name": "Parasail", - "owners": ["org_34P5Ca01in28Ek1oxb5OtfZdEjQ", "user_37qaJKhqfUEFgVF46sarwDHxE50"], - "slug": "parasail", - "statusPageUrl": null - }, - "provider_model_id": "parasail-cydonia-24-v41", - "provider_name": "Parasail", - "provider_region": null, - "provider_slug": "parasail/bf16", - "quantization": "bf16", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "frequency_penalty", - "min_p", - "presence_penalty", - "repetition_penalty", - "seed", - "stop", - "top_k", - "logit_bias" - ], - "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, - "variable_pricings": [], - "variant": "standard" - }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": true, - "hf_slug": "thedrummer/cydonia-24b-v4.1", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "TheDrummer: Cydonia 24B V4.1", - "output_modalities": ["text"], - "permaslug": "thedrummer/cydonia-24b-v4.1", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Cydonia 24B V4.1", - "slug": "thedrummer/cydonia-24b-v4.1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "thedrummer", - "context_length": 32768, - "created_at": "2025-03-10T19:56:06.00791+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.", - "endpoint": { - "adapter_name": "ParasailAdapter", - "can_abort": true, - "context_length": 32768, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.parasail.io/legal/terms", - "training": false - }, - "features": { - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": true, - "id": "1eb01ded-ae11-49e6-8aa6-3067584070bd", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 32768, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "thedrummer", - "context_length": 32768, - "created_at": "2025-03-10T19:56:06.00791+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.", - "features": {}, - "group": "Other", - "has_text_output": true, - "hf_slug": "TheDrummer/Skyfall-36B-v2", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "TheDrummer: Skyfall 36B V2", + "name": "TheDrummer: Skyfall 36B V2", "output_modalities": ["text"], "permaslug": "thedrummer/skyfall-36b-v2", "reasoning_config": null, @@ -129191,12 +127051,7 @@ "pricing": { "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000055", - "request": "0", - "web_search": "0" + "prompt": "0.00000055" }, "provider_display_name": "Parasail", "provider_info": { @@ -129243,7 +127098,6 @@ "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", @@ -129300,6 +127154,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_base64_video_input": false, "supports_tool_choice": { "literal_auto": true, @@ -129372,12 +127227,7 @@ "pricing": { "completion": "0.0000009", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "Parasail", "provider_info": { @@ -129426,7 +127276,6 @@ "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", @@ -129476,7 +127325,7 @@ }, { "author": "z-ai", - "context_length": 131072, + "context_length": 202752, "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, @@ -129489,7 +127338,7 @@ "endpoint": { "adapter_name": "ParasailAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 202752, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.parasail.io/legal/privacy-policy", @@ -129517,7 +127366,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 202752, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -129569,12 +127418,7 @@ "pricing": { "completion": "0.0000021", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000045", - "request": "0", - "web_search": "0" + "prompt": "0.00000045" }, "provider_display_name": "Parasail", "provider_info": { @@ -129623,7 +127467,6 @@ "temperature", "top_p", "frequency_penalty", - "min_p", "presence_penalty", "repetition_penalty", "seed", @@ -129762,12 +127605,8 @@ "pricing": { "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", "prompt": "0.000001", - "request": "0.005", - "web_search": "0" + "web_search": "0.005" }, "provider_display_name": "Perplexity", "provider_info": { @@ -129936,11 +127775,8 @@ "pricing": { "completion": "0.000008", "discount": 0, - "image": "0", - "image_output": "0", "internal_reasoning": "0.000003", "prompt": "0.000002", - "request": "0", "web_search": "0.005" }, "provider_display_name": "Perplexity", @@ -130097,11 +127933,7 @@ "pricing": { "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", "prompt": "0.000003", - "request": "0", "web_search": "0.005" }, "provider_display_name": "Perplexity", @@ -130286,12 +128118,8 @@ "pricing": { "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", "prompt": "0.000003", - "request": "0.018", - "web_search": "0" + "web_search": "0.018" }, "provider_display_name": "Perplexity", "provider_info": { @@ -130474,11 +128302,7 @@ "pricing": { "completion": "0.000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", "prompt": "0.000002", - "request": "0", "web_search": "0.005" }, "provider_display_name": "Perplexity", @@ -130687,13 +128511,7 @@ "pricing": { "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.00000027" }, "provider_display_name": "Phala", "provider_info": { @@ -130881,12 +128699,7 @@ "pricing": { "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000011", - "request": "0", - "web_search": "0" + "prompt": "0.00000011" }, "provider_display_name": "Phala", "provider_info": { @@ -131066,12 +128879,7 @@ "pricing": { "completion": "0.00000049", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Phala", "provider_info": { @@ -131160,21 +128968,21 @@ "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "author": "qwen", + "context_length": 32768, + "created_at": "2024-10-16T00:00:00+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://red-pill.ai/privacy", @@ -131183,6 +128991,7 @@ "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -131191,8 +129000,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "4a818281-fd73-4ab0-9498-6d903bbfbe67", + "has_completions": false, + "id": "3a6186c9-0ece-4797-9f97-45d7fab8c546", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -131205,62 +129014,54 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", + "author": "qwen", "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "created_at": "2024-10-16T00:00:00+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen2.5-7B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen2.5 7B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen-2.5-7b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen2.5 7B Instruct", + "slug": "qwen/qwen-2.5-7b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "qwen/qwen-2.5-7b-instruct", + "model_variant_slug": "qwen/qwen-2.5-7b-instruct", "moderation_required": false, - "name": "Phala | openai/gpt-oss-20b", + "name": "Phala | qwen/qwen-2.5-7b-instruct", "pricing": { - "completion": "0.00000015", + "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, "provider_display_name": "Phala", "provider_info": { @@ -131291,14 +129092,12 @@ "slug": "phala", "statusPageUrl": "https://status.phala.network/" }, - "provider_model_id": "phala/gpt-oss-20b", + "provider_model_id": "phala/qwen-2.5-7b-instruct", "provider_name": "Phala", "provider_region": null, "provider_slug": "phala", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -131308,64 +129107,59 @@ "seed", "top_k", "min_p", - "repetition_penalty", - "tools", - "tool_choice" + "repetition_penalty" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen2.5-7B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen2.5 7B Instruct", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen-2.5-7b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen2.5 7B Instruct", + "slug": "qwen/qwen-2.5-7b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 32768, - "created_at": "2024-10-16T00:00:00+00:00", + "context_length": 128000, + "created_at": "2025-10-06T23:47:56.430294+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": [], "default_system": null, - "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "endpoint": { "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://red-pill.ai/privacy", @@ -131374,17 +129168,16 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, + "literal_none": false, "literal_required": true, "type_function": true } }, "has_chat_completions": true, - "has_completions": false, - "id": "3a6186c9-0ece-4797-9f97-45d7fab8c546", + "has_completions": true, + "id": "da5edce8-a254-4324-aed5-ff0baa4efcee", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -131398,58 +129191,54 @@ "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2024-10-16T00:00:00+00:00", + "context_length": 262144, + "created_at": "2025-10-06T23:47:56.430294+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_stops": [], "default_system": null, - "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-7B-Instruct", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 7B Instruct", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-7b-instruct", + "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen2.5 7B Instruct", - "slug": "qwen/qwen-2.5-7b-instruct", + "short_name": "Qwen3 VL 30B A3B Instruct", + "slug": "qwen/qwen3-vl-30b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen-2.5-7b-instruct", - "model_variant_slug": "qwen/qwen-2.5-7b-instruct", + "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "model_variant_slug": "qwen/qwen3-vl-30b-a3b-instruct", "moderation_required": false, - "name": "Phala | qwen/qwen-2.5-7b-instruct", + "name": "Phala | qwen/qwen3-vl-30b-a3b-instruct", "pricing": { - "completion": "0.0000001", + "completion": "0.0000007", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Phala", "provider_info": { @@ -131480,7 +129269,7 @@ "slug": "phala", "statusPageUrl": "https://status.phala.network/" }, - "provider_model_id": "phala/qwen-2.5-7b-instruct", + "provider_model_id": "phala/qwen3-vl-30b-a3b-instruct", "provider_name": "Phala", "provider_region": null, "provider_slug": "phala", @@ -131495,55 +129284,64 @@ "seed", "top_k", "min_p", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-7B-Instruct", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 7B Instruct", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-7b-instruct", + "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen2.5 7B Instruct", - "slug": "qwen/qwen-2.5-7b-instruct", + "short_name": "Qwen3 VL 30B A3B Instruct", + "slug": "qwen/qwen3-vl-30b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 65536, - "created_at": "2025-02-01T11:45:11.997326+00:00", - "default_parameters": {}, + "author": "z-ai", + "context_length": 202752, + "created_at": "2026-01-19T14:45:13.352372+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "PhalaAdapter", "can_abort": true, - "context_length": 65536, + "context_length": 202752, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://red-pill.ai/privacy", @@ -131552,10 +129350,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": false, - "structured_outputs": false - }, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -131565,7 +129360,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cc87060b-0d48-4c13-aaf5-d452642af9f4", + "id": "d618ba32-51e8-4a80-9dfc-db286e69aeb1", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -131574,53 +129369,55 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 65536, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-02-01T11:45:11.997326+00:00", - "default_parameters": {}, + "author": "z-ai", + "context_length": 200000, + "created_at": "2026-01-19T14:45:13.352372+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", - "features": {}, - "group": "Qwen", + "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "zai-org/GLM-4.7-Flash", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "Z.AI: GLM 4.7 Flash", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.7-flash-20260119", + "reasoning_config": {}, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7 Flash", + "slug": "z-ai/glm-4.7-flash", + "updated_at": "2026-01-19T15:38:17.116015+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen2.5-vl-72b-instruct", - "model_variant_slug": "qwen/qwen2.5-vl-72b-instruct", + "model_variant_permaslug": "z-ai/glm-4.7-flash-20260119", + "model_variant_slug": "z-ai/glm-4.7-flash", "moderation_required": false, - "name": "Phala | qwen/qwen2.5-vl-72b-instruct", + "name": "Phala | z-ai/glm-4.7-flash-20260119", "pricing": { - "completion": "0.00000059", + "completion": "0.00000043", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000059", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Phala", "provider_info": { - "adapterName": "OpenAIAdapter", + "adapterName": "PhalaAdapter", "baseUrl": "https://api.redpill.ai/v1", "byokEnabled": true, "dataPolicy": { @@ -131647,12 +129444,14 @@ "slug": "phala", "statusPageUrl": "https://status.phala.network/" }, - "provider_model_id": "phala/qwen2.5-vl-72b-instruct", + "provider_model_id": "phala/glm-4.7-flash", "provider_name": "Phala", "provider_region": null, "provider_slug": "phala", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -131662,31 +129461,38 @@ "seed", "top_k", "min_p", - "repetition_penalty" + "repetition_penalty", + "tools", + "tool_choice", + "structured_outputs", + "response_format" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Qwen", + "features": { + "chat_template_config": {}, + "reasoning_config": {} + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "zai-org/GLM-4.7-Flash", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "Z.AI: GLM 4.7 Flash", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", - "reasoning_config": null, + "permaslug": "z-ai/glm-4.7-flash-20260119", + "reasoning_config": {}, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7 Flash", + "slug": "z-ai/glm-4.7-flash", + "updated_at": "2026-01-19T15:38:17.116015+00:00", "warning_message": null } ], @@ -131799,12 +129605,7 @@ "pricing": { "completion": "0.00000125", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000085", - "request": "0", - "web_search": "0" + "prompt": "0.00000085" }, "provider_display_name": "Relace", "provider_info": { @@ -131916,7 +129717,7 @@ "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 100, "limit_rpm_cf": null, "max_completion_tokens": 128000, "max_prompt_tokens": null, @@ -131970,13 +129771,7 @@ "pricing": { "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "prompt": "0.000001" }, "provider_display_name": "Relace", "provider_info": { @@ -132148,12 +129943,7 @@ "pricing": { "completion": "0.0000045", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "prompt": "0.000003" }, "provider_display_name": "SambaNova", "provider_info": { @@ -132311,12 +130101,7 @@ "pricing": { "completion": "0.00000075", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "SambaNova High Throughput", "provider_info": { @@ -132499,12 +130284,7 @@ "pricing": { "completion": "0.0000045", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "prompt": "0.000003" }, "provider_display_name": "SambaNova", "provider_info": { @@ -132694,12 +130474,7 @@ "pricing": { "completion": "0.000007", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000005", - "request": "0", - "web_search": "0" + "prompt": "0.000005" }, "provider_display_name": "SambaNova", "provider_info": { @@ -132873,12 +130648,7 @@ "pricing": { "completion": "0.0000014", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000007", - "request": "0", - "web_search": "0" + "prompt": "0.0000007" }, "provider_display_name": "SambaNova", "provider_info": { @@ -133042,12 +130812,7 @@ "pricing": { "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "SambaNova", "provider_info": { @@ -133195,12 +130960,7 @@ "pricing": { "completion": "0.0000009", "discount": 0.25, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000045", - "request": "0", - "web_search": "0" + "prompt": "0.00000045" }, "provider_display_name": "SambaNova Turbo", "provider_info": { @@ -133348,12 +131108,7 @@ "pricing": { "completion": "0.0000018", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000063", - "request": "0", - "web_search": "0" + "prompt": "0.00000063" }, "provider_display_name": "SambaNova", "provider_info": { @@ -133520,12 +131275,7 @@ "pricing": { "completion": "0.00000095", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000014", - "request": "0", - "web_search": "0" + "prompt": "0.00000014" }, "provider_display_name": "SambaNova", "provider_info": { @@ -133702,12 +131452,7 @@ "pricing": { "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, "provider_display_name": "SambaNova", "provider_info": { @@ -133911,12 +131656,7 @@ "pricing": { "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, "provider_display_name": "Seed", "provider_info": { @@ -133938,7 +131678,7 @@ "icon": { "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://avatars.githubusercontent.com/u/4158466?v=4&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": ["ep-20251223020539-6m7wz"], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, @@ -133950,8 +131690,8 @@ "provider_model_id": "ep-20251022091355-w7xt5", "provider_name": "Seed", "provider_region": null, - "provider_slug": "seed", - "quantization": "unknown", + "provider_slug": "seed/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", @@ -133959,10 +131699,11 @@ "max_tokens", "temperature", "top_p", - "tool_choice", + "stop", "tools", - "structured_outputs", - "response_format" + "tool_choice", + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -133970,8 +131711,10 @@ "variable_pricings": [ { "completions": "0.000004", + "input_cache_read": "0", + "input_cache_write": "0", "prompt": "0.0000005", - "threshold": 131072, + "threshold": 128000, "type": "prompt-threshold" } ], @@ -134054,7 +131797,7 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { @@ -134106,12 +131849,7 @@ "pricing": { "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000075", - "request": "0", - "web_search": "0" + "prompt": "0.000000075" }, "provider_display_name": "Seed", "provider_info": { @@ -134133,7 +131871,7 @@ "icon": { "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://avatars.githubusercontent.com/u/4158466?v=4&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": ["ep-20251223020539-6m7wz"], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, @@ -134145,8 +131883,8 @@ "provider_model_id": "ep-20251022091553-b9gf5", "provider_name": "Seed", "provider_region": null, - "provider_slug": "seed", - "quantization": "unknown", + "provider_slug": "seed/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", @@ -134154,10 +131892,11 @@ "max_tokens", "temperature", "top_p", + "stop", "tools", "tool_choice", - "structured_outputs", - "response_format" + "response_format", + "structured_outputs" ], "supports_multipart": true, "supports_reasoning": true, @@ -134165,8 +131904,10 @@ "variable_pricings": [ { "completions": "0.0000008", + "input_cache_read": "0", + "input_cache_write": "0", "prompt": "0.0000001", - "threshold": 131072, + "threshold": 128000, "type": "prompt-threshold" } ], @@ -134289,7 +132030,7 @@ "router": null, "short_name": "Seedream 4.5", "slug": "bytedance-seed/seedream-4.5", - "updated_at": "2025-12-23T20:23:30.403+00:00", + "updated_at": "2026-02-07T03:52:42.117798+00:00", "warning_message": null }, "model_variant_permaslug": "bytedance-seed/seedream-4.5-20251203", @@ -134297,14 +132038,10 @@ "moderation_required": false, "name": "Seed | bytedance-seed/seedream-4.5-20251203", "pricing": { - "completion": "0.000009581", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0.000009581", - "internal_reasoning": "0", - "prompt": "0", - "request": "0", - "web_search": "0" + "image_output": "0.00000958083832335329", + "prompt": "0" }, "provider_display_name": "Seed", "provider_info": { @@ -134326,7 +132063,7 @@ "icon": { "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://avatars.githubusercontent.com/u/4158466?v=4&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": ["ep-20251223020539-6m7wz"], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, @@ -134376,7 +132113,7 @@ "router": null, "short_name": "Seedream 4.5", "slug": "bytedance-seed/seedream-4.5", - "updated_at": "2025-12-23T20:23:30.403+00:00", + "updated_at": "2026-02-07T03:52:42.117798+00:00", "warning_message": null } ], @@ -134485,12 +132222,7 @@ "pricing": { "completion": "0.0000011", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000028", - "request": "0", - "web_search": "0" + "prompt": "0.00000028" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -134652,12 +132384,7 @@ "pricing": { "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -134821,12 +132548,7 @@ "pricing": { "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.00000027" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -135010,12 +132732,7 @@ "pricing": { "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.00000027" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -135197,13 +132914,7 @@ "pricing": { "completion": "0.00000042", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.00000027" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -135390,12 +133101,7 @@ "pricing": { "completion": "0.00000041", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.00000027" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -135582,12 +133288,7 @@ "pricing": { "completion": "0.00000218", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -135754,12 +133455,7 @@ "pricing": { "completion": "0.00000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000006" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -135834,16 +133530,20 @@ }, { "author": "minimax", - "context_length": 131072, - "created_at": "2025-06-17T22:46:54.257159+00:00", - "default_parameters": {}, + "context_length": 196608, + "created_at": "2025-12-23T01:56:37+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.9 + }, "default_stops": [], - "default_system": null, - "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 196608, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", @@ -135852,7 +133552,7 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -135862,7 +133562,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "e7356fe8-ee56-433f-b4d2-1e8eaa51da58", + "id": "9dce593a-4561-4c68-bf16-cd02ffbdd740", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -135876,52 +133576,56 @@ "max_tokens_per_image": null, "model": { "author": "minimax", - "context_length": 1000000, - "created_at": "2025-06-17T22:46:54.257159+00:00", - "default_parameters": {}, + "context_length": 204800, + "created_at": "2025-12-23T01:56:37+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.9 + }, "default_stops": [], - "default_system": null, - "description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it to process long sequences—up to 1 million tokens—while maintaining competitive FLOP efficiency. With 456 billion total parameters and 45.9B active per token, this variant is optimized for complex, multi-step reasoning tasks.\n\nTrained via a custom reinforcement learning pipeline (CISPO), M1 excels in long-context understanding, software engineering, agentic tool use, and mathematical reasoning. Benchmarks show strong performance across FullStackBench, SWE-bench, MATH, GPQA, and TAU-Bench, often outperforming other open models like DeepSeek R1 and Qwen3-235B.", + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M1", + "name": "MiniMax: MiniMax M2.1", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m1", + "permaslug": "minimax/minimax-m2.1", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "MiniMax M1", - "slug": "minimax/minimax-m1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "minimax/minimax-m1", - "model_variant_slug": "minimax/minimax-m1", + "model_variant_permaslug": "minimax/minimax-m2.1", + "model_variant_slug": "minimax/minimax-m2.1", "moderation_required": false, - "name": "SiliconFlow | minimax/minimax-m1", + "name": "SiliconFlow | minimax/minimax-m2.1", "pricing": { - "completion": "0.0000022", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000055", - "request": "0", - "web_search": "0" + "prompt": "0.00000029" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -135956,7 +133660,7 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "MiniMaxAI/MiniMax-M1-80k", + "provider_model_id": "MiniMaxAI/MiniMax-M2.1", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", @@ -135967,53 +133671,62 @@ "temperature", "top_p", "top_k", - "frequency_penalty" + "frequency_penalty", + "response_format", + "structured_outputs", + "tools", + "tool_choice" ], "supports_multipart": false, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "MiniMaxAI/MiniMax-M2.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MiniMax: MiniMax M1", + "name": "MiniMax: MiniMax M2.1", "output_modalities": ["text"], - "permaslug": "minimax/minimax-m1", + "permaslug": "minimax/minimax-m2.1", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "MiniMax M1", - "slug": "minimax/minimax-m1", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "moonshotai", - "context_length": 131072, - "created_at": "2025-06-16T23:18:29.95979+00:00", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi-Dev-72B is an open-source large language model fine-tuned for software engineering and issue resolution tasks. Based on Qwen2.5-72B, it is optimized using large-scale reinforcement learning that applies code patches in real repositories and validates them via full test suite execution—rewarding only correct, robust completions. The model achieves 60.4% on SWE-bench Verified, setting a new benchmark among open-source models for software bug fixing and code reasoning.", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", @@ -136022,7 +133735,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true @@ -136036,7 +133748,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "c3cff673-64b4-4508-b8eb-ab38c44b3956", + "id": "daeaf61c-828e-4b64-8f84-71079c6cf28e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -136045,49 +133757,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "moonshotai", - "context_length": 131072, - "created_at": "2025-06-16T23:18:29.95979+00:00", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi-Dev-72B is an open-source large language model fine-tuned for software engineering and issue resolution tasks. Based on Qwen2.5-72B, it is optimized using large-scale reinforcement learning that applies code patches in real repositories and validates them via full test suite execution—rewarding only correct, robust completions. The model achieves 60.4% on SWE-bench Verified, setting a new benchmark among open-source models for software bug fixing and code reasoning.", - "features": null, + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-Dev-72B", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi Dev 72B", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-dev-72b", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-0905", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Kimi Dev 72B", - "slug": "moonshotai/kimi-dev-72b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-dev-72b", - "model_variant_slug": "moonshotai/kimi-dev-72b", + "model_variant_permaslug": "moonshotai/kimi-k2-0905", + "model_variant_slug": "moonshotai/kimi-k2-0905", "moderation_required": false, - "name": "SiliconFlow | moonshotai/kimi-dev-72b", + "name": "SiliconFlow | moonshotai/kimi-k2-0905", "pricing": { - "completion": "0.00000115", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000029", - "request": "0", - "web_search": "0" + "prompt": "0.0000004" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -136122,54 +133839,68 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "moonshotai/Kimi-Dev-72B", + "provider_model_id": "moonshotai/Kimi-K2-Instruct-0905", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "structured_outputs", "response_format", "temperature", "top_p", "top_k", - "frequency_penalty" + "frequency_penalty", + "tools", + "tool_choice" ], "supports_multipart": false, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": null, + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-Dev-72B", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi Dev 72B", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-dev-72b", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-0905", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Kimi Dev 72B", - "slug": "moonshotai/kimi-dev-72b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "moonshotai", "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", - "default_parameters": {}, + "created_at": "2025-11-06T14:50:22.752525+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, @@ -136182,10 +133913,8 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -136195,7 +133924,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "daeaf61c-828e-4b64-8f84-71079c6cf28e", + "id": "02ca9089-557c-4a6e-bff9-977da0d2bd7a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -136210,53 +133939,53 @@ "model": { "author": "moonshotai", "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", - "default_parameters": {}, + "created_at": "2025-11-06T14:50:22.752525+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-0905", - "model_variant_slug": "moonshotai/kimi-k2-0905", + "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", + "model_variant_slug": "moonshotai/kimi-k2-thinking", "moderation_required": false, - "name": "SiliconFlow | moonshotai/kimi-k2-0905", + "name": "SiliconFlow | moonshotai/kimi-k2-thinking-20251106", "pricing": { - "completion": "0.000002", + "completion": "0.0000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000055" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -136291,60 +134020,63 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "moonshotai/Kimi-K2-Instruct-0905", + "provider_model_id": "moonshotai/Kimi-K2-Thinking", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", "supported_parameters": [ - "structured_outputs", - "response_format", + "reasoning", + "include_reasoning", "temperature", "top_p", "top_k", "frequency_penalty", + "response_format", + "structured_outputs", "tools", "tool_choice" ], "supports_multipart": false, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "moonshotai/kimi-k2-thinking-20251106", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "moonshotai", "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -136352,7 +134084,7 @@ }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, @@ -136365,18 +134097,18 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, "reasoning_return_mechanism": "reasoning-content", + "supports_multipart": false, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true + "literal_none": false, + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, "has_completions": false, - "id": "02ca9089-557c-4a6e-bff9-977da0d2bd7a", + "id": "34bb2131-6727-40e5-9dec-8793b254ba5c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -136391,7 +134123,7 @@ "model": { "author": "moonshotai", "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -136399,51 +134131,43 @@ }, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "SiliconFlow | moonshotai/kimi-k2-thinking-20251106", + "name": "SiliconFlow | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.0000025", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000055", - "request": "0", - "web_search": "0" + "prompt": "0.00000055" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -136478,7 +134202,7 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "moonshotai/Kimi-K2-Thinking", + "provider_model_id": "moonshotai/Kimi-K2.5", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", @@ -136504,31 +134228,29 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null } }, "group": "Other", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "permaslug": "moonshotai/kimi-k2.5-0127", "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null + "end_token": null, + "start_token": null }, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, { @@ -136625,13 +134347,7 @@ "pricing": { "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.00000027" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -136816,12 +134532,7 @@ "pricing": { "completion": "0.00000045", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000005" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -136908,21 +134619,17 @@ "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", @@ -136931,7 +134638,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true @@ -136945,7 +134651,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "73e031d2-12fd-49a3-8284-44d902295c23", + "id": "ce835ea8-00c7-4686-831d-7733dac5e2d9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -136954,66 +134660,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 8192, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "SiliconFlow | openai/gpt-oss-20b", + "name": "SiliconFlow | qwen/qwen3-235b-a22b-07-25", "pricing": { - "completion": "0.00000018", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.00000009" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -137048,67 +134742,68 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "openai/gpt-oss-20b", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "structured_outputs", "response_format", "temperature", "top_p", "top_k", - "frequency_penalty" + "frequency_penalty", + "tools", + "tool_choice" ], "supports_multipart": false, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, - "default_stops": [], + "created_at": "2025-07-25T13:19:17.179049+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, @@ -137121,6 +134816,7 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true @@ -137134,7 +134830,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "ce835ea8-00c7-4686-831d-7733dac5e2d9", + "id": "1ad7f5ea-2471-4f32-a3d4-dc5f009a4763", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -137149,53 +134845,53 @@ "model": { "author": "qwen", "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, - "default_stops": [], + "created_at": "2025-07-25T13:19:17.179049+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-235b-a22b-07-25", + "name": "SiliconFlow | qwen/qwen3-235b-a22b-thinking-2507", "pricing": { "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.00000013" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -137230,12 +134926,14 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "structured_outputs", "response_format", "temperature", @@ -137246,52 +134944,49 @@ "tool_choice" ], "supports_multipart": false, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, { "author": "qwen", "context_length": 262144, - "created_at": "2025-07-25T13:19:17.179049+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "created_at": "2025-07-29T16:36:05.687988+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, @@ -137304,7 +134999,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true @@ -137318,7 +135012,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "1ad7f5ea-2471-4f32-a3d4-dc5f009a4763", + "id": "73b51933-48c6-42d4-9440-2ce3c91ce131", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -137332,59 +135026,49 @@ "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-25T13:19:17.179049+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "context_length": 131072, + "created_at": "2025-07-29T16:36:05.687988+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "hf_slug": "Qwen/Qwen3-30B-A3B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "name": "Qwen: Qwen3 30B A3B Instruct 2507", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "permaslug": "qwen/qwen3-30b-a3b-instruct-2507", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "Qwen3 30B A3B Instruct 2507", + "slug": "qwen/qwen3-30b-a3b-instruct-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", - "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_permaslug": "qwen/qwen3-30b-a3b-instruct-2507", + "model_variant_slug": "qwen/qwen3-30b-a3b-instruct-2507", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-235b-a22b-thinking-2507", + "name": "SiliconFlow | qwen/qwen3-30b-a3b-instruct-2507", "pricing": { - "completion": "0.0000006", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000013", - "request": "0", - "web_search": "0" + "prompt": "0.00000009" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -137419,14 +135103,12 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "provider_model_id": "Qwen/Qwen3-30B-A3B-Instruct-2507", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "structured_outputs", "response_format", "temperature", @@ -137437,57 +135119,52 @@ "tool_choice" ], "supports_multipart": false, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "hf_slug": "Qwen/Qwen3-30B-A3B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "name": "Qwen: Qwen3 30B A3B Instruct 2507", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "permaslug": "qwen/qwen3-30b-a3b-instruct-2507", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "Qwen3 30B A3B Instruct 2507", + "slug": "qwen/qwen3-30b-a3b-instruct-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T22:16:44.177326+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "context_length": 262144, + "created_at": "2025-08-28T16:39:52.539313+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\n\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", @@ -137496,6 +135173,7 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true @@ -137503,13 +135181,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, + "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": false, - "id": "1d89ef0d-9801-43b7-97a6-4a180031b605", + "id": "7092535c-d1a0-4b59-a132-17533a555d32", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -137524,17 +135202,12 @@ "model": { "author": "qwen", "context_length": 131072, - "created_at": "2025-04-28T22:16:44.177326+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "created_at": "2025-08-28T16:39:52.539313+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique ability to switch seamlessly between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile, high-quality performance.\n\nSignificantly outperforming prior models like QwQ and Qwen2.5, Qwen3 delivers superior mathematics, coding, commonsense reasoning, creative writing, and interactive dialogue capabilities. The Qwen3-30B-A3B variant includes 30.5 billion parameters (3.3 billion activated), 48 layers, 128 experts (8 activated per task), and supports up to 131K token contexts with YaRN, setting a new standard among open-source models.", + "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\n\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", @@ -137543,39 +135216,34 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": "Qwen/Qwen3-30B-A3B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "name": "Qwen: Qwen3 30B A3B Thinking 2507", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "permaslug": "qwen/qwen3-30b-a3b-thinking-2507", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "Qwen3 30B A3B Thinking 2507", + "slug": "qwen/qwen3-30b-a3b-thinking-2507", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-04-28", - "model_variant_slug": "qwen/qwen3-30b-a3b", + "model_variant_permaslug": "qwen/qwen3-30b-a3b-thinking-2507", + "model_variant_slug": "qwen/qwen3-30b-a3b-thinking-2507", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-30b-a3b-04-28", + "name": "SiliconFlow | qwen/qwen3-30b-a3b-thinking-2507", "pricing": { - "completion": "0.00000045", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.00000009" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -137610,7 +135278,7 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-30B-A3B", + "provider_model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", @@ -137634,7 +135302,6 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", @@ -137643,38 +135310,38 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B", + "hf_slug": "Qwen/Qwen3-30B-A3B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B", + "name": "Qwen: Qwen3 30B A3B Thinking 2507", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-04-28", + "permaslug": "qwen/qwen3-30b-a3b-thinking-2507", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B", - "slug": "qwen/qwen3-30b-a3b", - "updated_at": "2026-01-08T19:57:57.475571+00:00", + "short_name": "Qwen3 30B A3B Thinking 2507", + "slug": "qwen/qwen3-30b-a3b-thinking-2507", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-29T16:36:05.687988+00:00", + "context_length": 131072, + "created_at": "2025-04-28T21:32:25.189881+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", @@ -137690,13 +135357,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, + "literal_required": false, "type_function": true } }, "has_chat_completions": true, "has_completions": false, - "id": "73b51933-48c6-42d4-9440-2ce3c91ce131", + "id": "6f9e09e5-e836-4de0-a27d-27c74b866001", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -137705,59 +135372,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", "context_length": 131072, - "created_at": "2025-07-29T16:36:05.687988+00:00", + "created_at": "2025-04-28T21:32:25.189881+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and agentic tool use. Post-trained on instruction data, it demonstrates competitive performance across reasoning (AIME, ZebraLogic), coding (MultiPL-E, LiveCodeBench), and alignment (IFEval, WritingBench) benchmarks. It outperforms its non-instruct variant on subjective and open-ended tasks while retaining strong factual and coding performance.", + "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", "features": { "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "hf_slug": "Qwen/Qwen3-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B Instruct 2507", + "name": "Qwen: Qwen3 32B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-instruct-2507", + "permaslug": "qwen/qwen3-32b-04-28", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Qwen3 30B A3B Instruct 2507", - "slug": "qwen/qwen3-30b-a3b-instruct-2507", + "short_name": "Qwen3 32B", + "slug": "qwen/qwen3-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-instruct-2507", - "model_variant_slug": "qwen/qwen3-30b-a3b-instruct-2507", + "model_variant_permaslug": "qwen/qwen3-32b-04-28", + "model_variant_slug": "qwen/qwen3-32b", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-30b-a3b-instruct-2507", + "name": "SiliconFlow | qwen/qwen3-32b-04-28", "pricing": { - "completion": "0.0000003", + "completion": "0.00000057", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.00000014" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -137792,12 +135452,14 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "provider_model_id": "Qwen/Qwen3-32B", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "structured_outputs", "response_format", "temperature", @@ -137808,48 +135470,46 @@ "tool_choice" ], "supports_multipart": false, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "hf_slug": "Qwen/Qwen3-32B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B Instruct 2507", + "name": "Qwen: Qwen3 32B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-instruct-2507", + "permaslug": "qwen/qwen3-32b-04-28", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Qwen3 30B A3B Instruct 2507", - "slug": "qwen/qwen3-30b-a3b-instruct-2507", + "short_name": "Qwen3 32B", + "slug": "qwen/qwen3-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", "context_length": 262144, - "created_at": "2025-08-28T16:39:52.539313+00:00", + "created_at": "2025-07-31T14:32:59.359308+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\n\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.", + "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, @@ -137862,7 +135522,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true @@ -137876,7 +135535,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "7092535c-d1a0-4b59-a132-17533a555d32", + "id": "cc97b4c8-6ac2-438e-94f0-7b8b972c8c2f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -137885,59 +135544,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2025-08-28T16:39:52.539313+00:00", + "context_length": 0, + "created_at": "2025-07-31T14:32:59.359308+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated from final answers.\n\nCompared to earlier Qwen3-30B releases, this version improves performance across logical reasoning, mathematics, science, coding, and multilingual benchmarks. It also demonstrates stronger instruction following, tool use, and alignment with human preferences. With higher reasoning efficiency and extended output budgets, it is best suited for advanced research, competitive problem solving, and agentic applications requiring structured long-context reasoning.", + "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", "features": { "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B Thinking 2507", + "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-thinking-2507", + "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B Thinking 2507", - "slug": "qwen/qwen3-30b-a3b-thinking-2507", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Coder 30B A3B Instruct", + "slug": "qwen/qwen3-coder-30b-a3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-30b-a3b-thinking-2507", - "model_variant_slug": "qwen/qwen3-30b-a3b-thinking-2507", + "model_variant_permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "model_variant_slug": "qwen/qwen3-coder-30b-a3b-instruct", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-30b-a3b-thinking-2507", + "name": "SiliconFlow | qwen/qwen3-coder-30b-a3b-instruct", "pricing": { - "completion": "0.0000003", + "completion": "0.00000028", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000009", - "request": "0", - "web_search": "0" + "prompt": "0.00000007" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -137972,14 +135626,12 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "provider_model_id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "structured_outputs", "response_format", "temperature", @@ -137990,52 +135642,52 @@ "tool_choice" ], "supports_multipart": false, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 30B A3B Thinking 2507", + "name": "Qwen: Qwen3 Coder 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-30b-a3b-thinking-2507", + "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 30B A3B Thinking 2507", - "slug": "qwen/qwen3-30b-a3b-thinking-2507", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 Coder 30B A3B Instruct", + "slug": "qwen/qwen3-coder-30b-a3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:32:25.189881+00:00", + "context_length": 262144, + "created_at": "2025-07-23T00:29:06+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", @@ -138051,13 +135703,13 @@ "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": false, + "literal_required": true, "type_function": true } }, "has_chat_completions": true, "has_completions": false, - "id": "6f9e09e5-e836-4de0-a27d-27c74b866001", + "id": "9e1b542f-06a7-4d23-a5b4-615bdd78d952", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -138066,57 +135718,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:32:25.189881+00:00", + "context_length": 1048576, + "created_at": "2025-07-23T00:29:06+00:00", "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for tasks like math, coding, and logical inference, and a \"non-thinking\" mode for faster, general-purpose conversation. The model demonstrates strong performance in instruction-following, agent tool use, creative writing, and multilingual tasks across 100+ languages and dialects. It natively handles 32K token contexts and can extend to 131K tokens using YaRN-based scaling. ", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-32b-04-28", - "model_variant_slug": "qwen/qwen3-32b", + "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "model_variant_slug": "qwen/qwen3-coder", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-32b-04-28", + "name": "SiliconFlow | qwen/qwen3-coder-480b-a35b-07-25", "pricing": { - "completion": "0.00000057", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000014", - "request": "0", - "web_search": "0" + "prompt": "0.00000025" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -138151,14 +135800,12 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-32B", + "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "structured_outputs", "response_format", "temperature", @@ -138169,50 +135816,56 @@ "tool_choice" ], "supports_multipart": false, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-32B", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 32B", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-32b-04-28", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "" + "end_token": null, + "start_token": null, + "system_prompt": null }, "router": null, - "short_name": "Qwen3 32B", - "slug": "qwen/qwen3-32b", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-31T14:32:59.359308+00:00", - "default_parameters": {}, + "context_length": 32768, + "created_at": "2025-10-28T19:43:42.126124+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", + "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", @@ -138221,10 +135874,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -138234,7 +135884,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "cc97b4c8-6ac2-438e-94f0-7b8b972c8c2f", + "id": "5fdf3b4c-4b00-46ca-98dd-630c06fc6fcc", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -138243,59 +135893,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 0, - "created_at": "2025-07-31T14:32:59.359308+00:00", - "default_parameters": {}, + "context_length": 32000, + "created_at": "2025-10-28T19:43:42.126124+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the Qwen3 architecture, it supports a native context length of 256K tokens (extendable to 1M with Yarn) and performs strongly in tasks involving function calls, browser use, and structured code completion.\n\nThis model is optimized for instruction-following without “thinking mode”, and integrates well with OpenAI-compatible tool-use formats. ", + "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "group": "Other", + "has_text_output": false, + "hf_slug": "Qwen/Qwen3-Embedding-8B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 30B A3B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "name": "Qwen: Qwen3 Embedding 8B", + "output_modalities": ["embeddings"], + "permaslug": "qwen/qwen3-embedding-8b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 30B A3B Instruct", - "slug": "qwen/qwen3-coder-30b-a3b-instruct", + "short_name": "Qwen3 Embedding 8B", + "slug": "qwen/qwen3-embedding-8b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-30b-a3b-instruct", - "model_variant_slug": "qwen/qwen3-coder-30b-a3b-instruct", + "model_variant_permaslug": "qwen/qwen3-embedding-8b", + "model_variant_slug": "qwen/qwen3-embedding-8b", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-coder-30b-a3b-instruct", + "name": "SiliconFlow | qwen/qwen3-embedding-8b", "pricing": { - "completion": "0.00000028", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000007", - "request": "0", - "web_search": "0" + "prompt": "0.00000004" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -138330,64 +135980,56 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "provider_model_id": "Qwen/Qwen3-Embedding-8B", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", - "supported_parameters": [ - "structured_outputs", - "response_format", - "temperature", - "top_p", - "top_k", - "frequency_penalty", - "tools", - "tool_choice" - ], + "supported_parameters": ["temperature", "top_p", "top_k", "frequency_penalty"], "supports_multipart": false, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "group": "Other", + "has_text_output": false, + "hf_slug": "Qwen/Qwen3-Embedding-8B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 30B A3B Instruct", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-30b-a3b-instruct", + "name": "Qwen: Qwen3 Embedding 8B", + "output_modalities": ["embeddings"], + "permaslug": "qwen/qwen3-embedding-8b", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 30B A3B Instruct", - "slug": "qwen/qwen3-coder-30b-a3b-instruct", + "short_name": "Qwen3 Embedding 8B", + "slug": "qwen/qwen3-embedding-8b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", "context_length": 262144, - "created_at": "2025-07-23T00:29:06+00:00", + "created_at": "2025-09-11T17:36:53.6379+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, @@ -138413,7 +136055,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "9e1b542f-06a7-4d23-a5b4-615bdd78d952", + "id": "badd80a8-8638-4337-8710-bebf8c83a078", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -138427,12 +136069,12 @@ "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 1048576, - "created_at": "2025-07-23T00:29:06+00:00", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "features": { "reasoning_config": { "end_token": null, @@ -138442,39 +136084,34 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "model_variant_slug": "qwen/qwen3-coder", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-coder-480b-a35b-07-25", + "name": "SiliconFlow | qwen/qwen3-next-80b-a3b-instruct-2509", "pricing": { - "completion": "0.000001", + "completion": "0.0000014", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000025", - "request": "0", - "web_search": "0" + "prompt": "0.00000014" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -138509,7 +136146,7 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", @@ -138539,42 +136176,42 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 32768, - "created_at": "2025-10-28T19:43:42.126124+00:00", + "context_length": 262144, + "created_at": "2025-09-23T23:04:47+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, - "context_length": 32768, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", @@ -138583,7 +136220,11 @@ "training": false }, "features": { - "supports_input_audio": false, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -138593,7 +136234,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "5fdf3b4c-4b00-46ca-98dd-630c06fc6fcc", + "id": "ff95af09-2a9d-4274-ba17-3829c5b8143b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -138602,65 +136243,58 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 262144, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 32000, - "created_at": "2025-10-28T19:43:42.126124+00:00", + "context_length": 131072, + "created_at": "2025-09-23T23:04:47+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, "default_stops": [], "default_system": null, - "description": "The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks. This series inherits the exceptional multilingual capabilities, long-text understanding, and reasoning skills of its foundational model. The Qwen3 Embedding series represents significant advancements in multiple text embedding and ranking tasks, including text retrieval, code retrieval, text classification, text clustering, and bitext mining.", + "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Other", - "has_text_output": false, - "hf_slug": "Qwen/Qwen3-Embedding-8B", + "group": "Qwen3", + "has_text_output": true, + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Embedding 8B", - "output_modalities": ["embeddings"], - "permaslug": "qwen/qwen3-embedding-8b", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "output_modalities": ["text"], + "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Embedding 8B", - "slug": "qwen/qwen3-embedding-8b", + "short_name": "Qwen3 VL 235B A22B Instruct", + "slug": "qwen/qwen3-vl-235b-a22b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-embedding-8b", - "model_variant_slug": "qwen/qwen3-embedding-8b", + "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", + "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-embedding-8b", + "name": "SiliconFlow | qwen/qwen3-vl-235b-a22b-instruct", "pricing": { - "completion": "0", + "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000004", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -138695,56 +136329,68 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Embedding-8B", + "provider_model_id": "Qwen/Qwen3-VL-235B-A22B-Instruct", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", - "supported_parameters": ["temperature", "top_p", "top_k", "frequency_penalty"], - "supports_multipart": false, + "supported_parameters": [ + "structured_outputs", + "response_format", + "temperature", + "top_p", + "top_k", + "frequency_penalty", + "tools", + "tool_choice" + ], + "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Other", - "has_text_output": false, - "hf_slug": "Qwen/Qwen3-Embedding-8B", + "group": "Qwen3", + "has_text_output": true, + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Embedding 8B", - "output_modalities": ["embeddings"], - "permaslug": "qwen/qwen3-embedding-8b", + "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "output_modalities": ["text"], + "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 Embedding 8B", - "slug": "qwen/qwen3-embedding-8b", + "short_name": "Qwen3 VL 235B A22B Instruct", + "slug": "qwen/qwen3-vl-235b-a22b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", - "default_parameters": {}, + "created_at": "2025-09-23T23:04:50+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.8, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, @@ -138757,10 +136403,12 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true }, + "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -138770,7 +136418,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "badd80a8-8638-4337-8710-bebf8c83a078", + "id": "37f02013-5c66-4800-bf34-dac4804abd14", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -138784,54 +136432,53 @@ "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:36:53.6379+00:00", - "default_parameters": {}, + "context_length": 131072, + "created_at": "2025-09-23T23:04:50+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.8, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", + "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", "features": { "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Thinking", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "name": "Qwen: Qwen3 VL 235B A22B Thinking", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "permaslug": "qwen/qwen3-vl-235b-a22b-thinking", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 VL 235B A22B Thinking", + "slug": "qwen/qwen3-vl-235b-a22b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct", + "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-thinking", + "model_variant_slug": "qwen/qwen3-vl-235b-a22b-thinking", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-next-80b-a3b-instruct-2509", + "name": "SiliconFlow | qwen/qwen3-vl-235b-a22b-thinking", "pricing": { - "completion": "0.0000014", + "completion": "0.0000035", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000014", - "request": "0", - "web_search": "0" + "prompt": "0.00000045" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -138866,12 +136513,14 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "provider_model_id": "Qwen/Qwen3-VL-235B-A22B-Thinking", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", "supported_parameters": [ + "reasoning", + "include_reasoning", "structured_outputs", "response_format", "temperature", @@ -138881,45 +136530,45 @@ "tools", "tool_choice" ], - "supports_multipart": false, - "supports_reasoning": false, + "supports_multipart": true, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Thinking", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Instruct", + "name": "Qwen: Qwen3 VL 235B A22B Thinking", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", + "permaslug": "qwen/qwen3-vl-235b-a22b-thinking", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Next 80B A3B Instruct", - "slug": "qwen/qwen3-next-80b-a3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 VL 235B A22B Thinking", + "slug": "qwen/qwen3-vl-235b-a22b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "qwen", "context_length": 262144, - "created_at": "2025-09-23T23:04:47+00:00", + "created_at": "2025-10-06T23:47:56.430294+00:00", "default_parameters": { "frequency_penalty": null, "temperature": 0.7, @@ -138927,7 +136576,7 @@ }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, @@ -138954,7 +136603,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "ff95af09-2a9d-4274-ba17-3829c5b8143b", + "id": "1553890e-99fa-45d4-b77c-132776f6cac6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -138968,8 +136617,8 @@ "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-23T23:04:47+00:00", + "context_length": 262144, + "created_at": "2025-10-06T23:47:56.430294+00:00", "default_parameters": { "frequency_penalty": null, "temperature": 0.7, @@ -138977,8 +136626,9 @@ }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -138987,39 +136637,34 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", + "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 235B A22B Instruct", - "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "short_name": "Qwen3 VL 30B A3B Instruct", + "slug": "qwen/qwen3-vl-30b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-instruct", - "model_variant_slug": "qwen/qwen3-vl-235b-a22b-instruct", + "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "model_variant_slug": "qwen/qwen3-vl-30b-a3b-instruct", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-vl-235b-a22b-instruct", + "name": "SiliconFlow | qwen/qwen3-vl-30b-a3b-instruct", "pricing": { - "completion": "0.0000015", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.00000029" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -139054,7 +136699,7 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "provider_model_id": "Qwen/Qwen3-VL-30B-A3B-Instruct", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", @@ -139076,6 +136721,7 @@ "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -139084,38 +136730,37 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Instruct", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Instruct", + "name": "Qwen: Qwen3 VL 30B A3B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-instruct", + "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 235B A22B Instruct", - "slug": "qwen/qwen3-vl-235b-a22b-instruct", + "short_name": "Qwen3 VL 30B A3B Instruct", + "slug": "qwen/qwen3-vl-30b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", "context_length": 262144, - "created_at": "2025-09-23T23:04:50+00:00", + "created_at": "2025-10-06T23:47:59.575824+00:00", "default_parameters": { - "frequency_penalty": null, "temperature": 0.8, "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, @@ -139143,7 +136788,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "37f02013-5c66-4800-bf34-dac4804abd14", + "id": "775d37c0-03e1-435f-aefa-d7252f4b1c56", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -139157,16 +136802,15 @@ "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2025-09-23T23:04:50+00:00", + "context_length": 0, + "created_at": "2025-10-06T23:47:59.575824+00:00", "default_parameters": { - "frequency_penalty": null, "temperature": 0.8, "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", + "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", "features": { "reasoning_config": { "end_token": "", @@ -139176,39 +136820,34 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Thinking", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Thinking", + "name": "Qwen: Qwen3 VL 30B A3B Thinking", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-thinking", + "permaslug": "qwen/qwen3-vl-30b-a3b-thinking", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 235B A22B Thinking", - "slug": "qwen/qwen3-vl-235b-a22b-thinking", + "short_name": "Qwen3 VL 30B A3B Thinking", + "slug": "qwen/qwen3-vl-30b-a3b-thinking", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-235b-a22b-thinking", - "model_variant_slug": "qwen/qwen3-vl-235b-a22b-thinking", + "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-thinking", + "model_variant_slug": "qwen/qwen3-vl-30b-a3b-thinking", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-vl-235b-a22b-thinking", + "name": "SiliconFlow | qwen/qwen3-vl-30b-a3b-thinking", "pricing": { - "completion": "0.0000035", + "completion": "0.000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000045", - "request": "0", - "web_search": "0" + "prompt": "0.00000029" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -139243,7 +136882,7 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-VL-235B-A22B-Thinking", + "provider_model_id": "Qwen/Qwen3-VL-30B-A3B-Thinking", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", @@ -139275,42 +136914,38 @@ }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-235B-A22B-Thinking", + "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 235B A22B Thinking", + "name": "Qwen: Qwen3 VL 30B A3B Thinking", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-235b-a22b-thinking", + "permaslug": "qwen/qwen3-vl-30b-a3b-thinking", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 235B A22B Thinking", - "slug": "qwen/qwen3-vl-235b-a22b-thinking", + "short_name": "Qwen3 VL 30B A3B Thinking", + "slug": "qwen/qwen3-vl-30b-a3b-thinking", "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-06T23:47:56.430294+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, - "default_stops": [], + "context_length": 131072, + "created_at": "2025-03-05T21:06:54.875499+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", @@ -139319,21 +136954,17 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_multipart": true, + "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, - "literal_required": true, + "literal_required": false, "type_function": true } }, "has_chat_completions": true, "has_completions": false, - "id": "1553890e-99fa-45d4-b77c-132776f6cac6", + "id": "3911953b-d9a4-4cb5-ba26-dd45c9cbee89", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -139342,64 +136973,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-06T23:47:56.430294+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 - }, - "default_stops": [], + "context_length": 131072, + "created_at": "2025-03-05T21:06:54.875499+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "Qwen3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "hf_slug": "Qwen/QwQ-32B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "qwq", "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Instruct", + "name": "Qwen: QwQ 32B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "permaslug": "qwen/qwq-32b", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Qwen3 VL 30B A3B Instruct", - "slug": "qwen/qwen3-vl-30b-a3b-instruct", + "short_name": "QwQ 32B", + "slug": "qwen/qwq-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-instruct", - "model_variant_slug": "qwen/qwen3-vl-30b-a3b-instruct", + "model_variant_permaslug": "qwen/qwq-32b", + "model_variant_slug": "qwen/qwq-32b", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-vl-30b-a3b-instruct", + "name": "SiliconFlow | qwen/qwq-32b", "pricing": { - "completion": "0.000001", + "completion": "0.00000058", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000029", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -139434,14 +137053,14 @@ "slug": "siliconflow", "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "provider_model_id": "Qwen/QwQ-32B", "provider_name": "SiliconFlow", "provider_region": null, "provider_slug": "siliconflow/fp8", "quantization": "fp8", "supported_parameters": [ - "structured_outputs", - "response_format", + "reasoning", + "include_reasoning", "temperature", "top_p", "top_k", @@ -139449,57 +137068,51 @@ "tools", "tool_choice" ], - "supports_multipart": true, - "supports_reasoning": false, + "supports_multipart": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" } }, - "group": "Qwen3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Instruct", + "hf_slug": "Qwen/QwQ-32B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, + "input_modalities": ["text"], + "instruct_type": "qwq", "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Instruct", + "name": "Qwen: QwQ 32B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-instruct", + "permaslug": "qwen/qwq-32b", "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null + "end_token": "", + "start_token": "" }, "router": null, - "short_name": "Qwen3 VL 30B A3B Instruct", - "slug": "qwen/qwen3-vl-30b-a3b-instruct", + "short_name": "QwQ 32B", + "slug": "qwen/qwq-32b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-06T23:47:59.575824+00:00", - "default_parameters": { - "temperature": 0.8, - "top_p": 0.95 - }, + "author": "tencent", + "context_length": 131072, + "created_at": "2025-07-08T15:14:24.006774+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought. It offers competitive benchmark performance across mathematics, science, coding, and multi-turn reasoning tasks, while maintaining high inference efficiency via Grouped Query Attention (GQA) and quantization support (FP8, GPTQ, etc.).", "endpoint": { "adapter_name": "SiliconFlowAdapter", "can_abort": false, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", @@ -139508,12 +137121,10 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, "supported_parameters": { "response_format": true, "structured_outputs": true }, - "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -139523,7 +137134,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "775d37c0-03e1-435f-aefa-d7252f4b1c56", + "id": "519e0e1e-7254-41de-abe1-2e0ebf026ce9", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -139532,20 +137143,17 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 262144, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 0, - "created_at": "2025-10-06T23:47:59.575824+00:00", - "default_parameters": { - "temperature": 0.8, - "top_p": 0.95 - }, + "author": "tencent", + "context_length": 32768, + "created_at": "2025-07-08T15:14:24.006774+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", + "description": "Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought. It offers competitive benchmark performance across mathematics, science, coding, and multi-turn reasoning tasks, while maintaining high inference efficiency via Grouped Query Attention (GQA) and quantization support (FP8, GPTQ, etc.).", "features": { "reasoning_config": { "end_token": "", @@ -139553,577 +137161,36 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Thinking", + "hf_slug": "tencent/Hunyuan-A13B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Thinking", + "name": "Tencent: Hunyuan A13B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-thinking", + "permaslug": "tencent/hunyuan-a13b-instruct", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 30B A3B Thinking", - "slug": "qwen/qwen3-vl-30b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Hunyuan A13B Instruct", + "slug": "tencent/hunyuan-a13b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-30b-a3b-thinking", - "model_variant_slug": "qwen/qwen3-vl-30b-a3b-thinking", + "model_variant_permaslug": "tencent/hunyuan-a13b-instruct", + "model_variant_slug": "tencent/hunyuan-a13b-instruct", "moderation_required": false, - "name": "SiliconFlow | qwen/qwen3-vl-30b-a3b-thinking", + "name": "SiliconFlow | tencent/hunyuan-a13b-instruct", "pricing": { - "completion": "0.000001", + "completion": "0.00000057", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000029", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "SiliconFlow", - "provider_info": { - "adapterName": "SiliconFlowAdapter", - "baseUrl": "https://api.siliconflow.com/v1", - "byokEnabled": false, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://docs.siliconflow.com/en/legals/terms-of-service", - "training": false - }, - "displayName": "SiliconFlow", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "SG", - "icon": { - "url": "/images/icons/SiliconFlow.svg" - }, - "ignoredProviderModels": [ - "inclusionAI/Ling-mini-2.0", - "inclusionAI/Ring-flash-2.0", - "inclusionAI/Ling-flash-2.0" - ], - "isAbortable": false, - "isMultipartSupported": false, - "moderationRequired": false, - "name": "SiliconFlow", - "owners": ["{}"], - "slug": "siliconflow", - "statusPageUrl": null - }, - "provider_model_id": "Qwen/Qwen3-VL-30B-A3B-Thinking", - "provider_name": "SiliconFlow", - "provider_region": null, - "provider_slug": "siliconflow/fp8", - "quantization": "fp8", - "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", - "temperature", - "top_p", - "top_k", - "frequency_penalty", - "tools", - "tool_choice" - ], - "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "standard" - }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Qwen3", - "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-30B-A3B-Thinking", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 30B A3B Thinking", - "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-30b-a3b-thinking", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, - "router": null, - "short_name": "Qwen3 VL 30B A3B Thinking", - "slug": "qwen/qwen3-vl-30b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", - "warning_message": null - }, - { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-03-05T21:06:54.875499+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], - "default_system": null, - "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", - "endpoint": { - "adapter_name": "SiliconFlowAdapter", - "can_abort": false, - "context_length": 131072, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://docs.siliconflow.com/en/legals/terms-of-service", - "training": false - }, - "features": { - "is_mandatory_reasoning": true, - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": false, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": false, - "id": "3911953b-d9a4-4cb5-ba26-dd45c9cbee89", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 131072, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2025-03-05T21:06:54.875499+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], - "default_system": null, - "description": "QwQ is the reasoning model of the Qwen series. Compared with conventional instruction-tuned models, QwQ, which is capable of thinking and reasoning, can achieve significantly enhanced performance in downstream tasks, especially hard problems. QwQ-32B is the medium-sized reasoning model, which is capable of achieving competitive performance against state-of-the-art reasoning models, e.g., DeepSeek-R1, o1-mini.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen", - "has_text_output": true, - "hf_slug": "Qwen/QwQ-32B", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwq", - "model_version_group_id": null, - "name": "Qwen: QwQ 32B", - "output_modalities": ["text"], - "permaslug": "qwen/qwq-32b", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, - "router": null, - "short_name": "QwQ 32B", - "slug": "qwen/qwq-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "qwen/qwq-32b", - "model_variant_slug": "qwen/qwq-32b", - "moderation_required": false, - "name": "SiliconFlow | qwen/qwq-32b", - "pricing": { - "completion": "0.00000058", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "SiliconFlow", - "provider_info": { - "adapterName": "SiliconFlowAdapter", - "baseUrl": "https://api.siliconflow.com/v1", - "byokEnabled": false, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://docs.siliconflow.com/en/legals/terms-of-service", - "training": false - }, - "displayName": "SiliconFlow", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "SG", - "icon": { - "url": "/images/icons/SiliconFlow.svg" - }, - "ignoredProviderModels": [ - "inclusionAI/Ling-mini-2.0", - "inclusionAI/Ring-flash-2.0", - "inclusionAI/Ling-flash-2.0" - ], - "isAbortable": false, - "isMultipartSupported": false, - "moderationRequired": false, - "name": "SiliconFlow", - "owners": ["{}"], - "slug": "siliconflow", - "statusPageUrl": null - }, - "provider_model_id": "Qwen/QwQ-32B", - "provider_name": "SiliconFlow", - "provider_region": null, - "provider_slug": "siliconflow/fp8", - "quantization": "fp8", - "supported_parameters": [ - "reasoning", - "include_reasoning", - "temperature", - "top_p", - "top_k", - "frequency_penalty", - "tools", - "tool_choice" - ], - "supports_multipart": false, - "supports_reasoning": true, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "standard" - }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, - "group": "Qwen", - "has_text_output": true, - "hf_slug": "Qwen/QwQ-32B", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwq", - "model_version_group_id": null, - "name": "Qwen: QwQ 32B", - "output_modalities": ["text"], - "permaslug": "qwen/qwq-32b", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, - "router": null, - "short_name": "QwQ 32B", - "slug": "qwen/qwq-32b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - { - "author": "stepfun-ai", - "context_length": 65536, - "created_at": "2025-08-28T21:09:35.282323+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "Step3 is a cutting-edge multimodal reasoning model—built on a Mixture-of-Experts architecture with 321B total parameters and 38B active. It is designed end-to-end to minimize decoding costs while delivering top-tier performance in vision–language reasoning. Through the co-design of Multi-Matrix Factorization Attention (MFA) and Attention-FFN Disaggregation (AFD), Step3 maintains exceptional efficiency across both flagship and low-end accelerators.", - "endpoint": { - "adapter_name": "SiliconFlowAdapter", - "can_abort": false, - "context_length": 65536, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://docs.siliconflow.com/en/legals/terms-of-service", - "training": false - }, - "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_multipart": true, - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": false, - "id": "d07c4675-6f77-4783-b2ab-2123a5064d2c", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 65536, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "stepfun-ai", - "context_length": 65536, - "created_at": "2025-08-28T21:09:35.282323+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "Step3 is a cutting-edge multimodal reasoning model—built on a Mixture-of-Experts architecture with 321B total parameters and 38B active. It is designed end-to-end to minimize decoding costs while delivering top-tier performance in vision–language reasoning. Through the co-design of Multi-Matrix Factorization Attention (MFA) and Attention-FFN Disaggregation (AFD), Step3 maintains exceptional efficiency across both flagship and low-end accelerators.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": true, - "hf_slug": "stepfun-ai/step3", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["image", "text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "StepFun: Step3", - "output_modalities": ["text"], - "permaslug": "stepfun-ai/step3", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Step3", - "slug": "stepfun-ai/step3", - "updated_at": "2026-01-08T19:23:52.555156+00:00", - "warning_message": null - }, - "model_variant_permaslug": "stepfun-ai/step3", - "model_variant_slug": "stepfun-ai/step3", - "moderation_required": false, - "name": "SiliconFlow | stepfun-ai/step3", - "pricing": { - "completion": "0.00000142", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000057", - "request": "0", - "web_search": "0" - }, - "provider_display_name": "SiliconFlow", - "provider_info": { - "adapterName": "SiliconFlowAdapter", - "baseUrl": "https://api.siliconflow.com/v1", - "byokEnabled": false, - "dataPolicy": { - "canPublish": false, - "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://docs.siliconflow.com/en/legals/terms-of-service", - "training": false - }, - "displayName": "SiliconFlow", - "editors": ["{}"], - "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "SG", - "icon": { - "url": "/images/icons/SiliconFlow.svg" - }, - "ignoredProviderModels": [ - "inclusionAI/Ling-mini-2.0", - "inclusionAI/Ring-flash-2.0", - "inclusionAI/Ling-flash-2.0" - ], - "isAbortable": false, - "isMultipartSupported": false, - "moderationRequired": false, - "name": "SiliconFlow", - "owners": ["{}"], - "slug": "siliconflow", - "statusPageUrl": null - }, - "provider_model_id": "stepfun-ai/step3", - "provider_name": "SiliconFlow", - "provider_region": null, - "provider_slug": "siliconflow/fp8", - "quantization": "fp8", - "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", - "temperature", - "top_p", - "top_k", - "frequency_penalty", - "tools", - "tool_choice" - ], - "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, - "variable_pricings": [], - "variant": "standard" - }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": true, - "hf_slug": "stepfun-ai/step3", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["image", "text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "StepFun: Step3", - "output_modalities": ["text"], - "permaslug": "stepfun-ai/step3", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, - "router": null, - "short_name": "Step3", - "slug": "stepfun-ai/step3", - "updated_at": "2026-01-08T19:23:52.555156+00:00", - "warning_message": null - }, - { - "author": "tencent", - "context_length": 131072, - "created_at": "2025-07-08T15:14:24.006774+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought. It offers competitive benchmark performance across mathematics, science, coding, and multi-turn reasoning tasks, while maintaining high inference efficiency via Grouped Query Attention (GQA) and quantization support (FP8, GPTQ, etc.).", - "endpoint": { - "adapter_name": "SiliconFlowAdapter", - "can_abort": false, - "context_length": 131072, - "data_policy": { - "canPublish": false, - "privacyPolicyURL": "https://docs.siliconflow.com/en/legals/privacy-policy", - "retainsPrompts": false, - "termsOfServiceURL": "https://docs.siliconflow.com/en/legals/terms-of-service", - "training": false - }, - "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_tool_choice": { - "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } - }, - "has_chat_completions": true, - "has_completions": false, - "id": "519e0e1e-7254-41de-abe1-2e0ebf026ce9", - "is_byok": false, - "is_deranked": false, - "is_disabled": false, - "is_free": false, - "is_hidden": false, - "limit_rpd": null, - "limit_rpm": null, - "limit_rpm_cf": null, - "max_completion_tokens": 131072, - "max_prompt_tokens": null, - "max_tokens_per_image": null, - "model": { - "author": "tencent", - "context_length": 32768, - "created_at": "2025-07-08T15:14:24.006774+00:00", - "default_parameters": {}, - "default_stops": [], - "default_system": null, - "description": "Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought. It offers competitive benchmark performance across mathematics, science, coding, and multi-turn reasoning tasks, while maintaining high inference efficiency via Grouped Query Attention (GQA) and quantization support (FP8, GPTQ, etc.).", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", - "has_text_output": true, - "hf_slug": "tencent/Hunyuan-A13B-Instruct", - "hf_updated_at": null, - "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Tencent: Hunyuan A13B Instruct", - "output_modalities": ["text"], - "permaslug": "tencent/hunyuan-a13b-instruct", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, - "router": null, - "short_name": "Hunyuan A13B Instruct", - "slug": "tencent/hunyuan-a13b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - }, - "model_variant_permaslug": "tencent/hunyuan-a13b-instruct", - "model_variant_slug": "tencent/hunyuan-a13b-instruct", - "moderation_required": false, - "name": "SiliconFlow | tencent/hunyuan-a13b-instruct", - "pricing": { - "completion": "0.00000057", - "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000014", - "request": "0", - "web_search": "0" + "prompt": "0.00000014" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -140232,6 +137299,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -140301,12 +137369,7 @@ "pricing": { "completion": "0.00000086", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000014", - "request": "0", - "web_search": "0" + "prompt": "0.00000014" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -140413,6 +137476,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supported_parameters": { "response_format": true, "structured_outputs": true @@ -140487,12 +137551,7 @@ "pricing": { "completion": "0.0000019", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000039", - "request": "0", - "web_search": "0" + "prompt": "0.00000039" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -140602,6 +137661,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_base64_video_input": false, "supports_multipart": true, "supports_tool_choice": { @@ -140675,13 +137735,7 @@ "pricing": { "completion": "0.0000009", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -140861,12 +137915,7 @@ "pricing": { "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000042", - "request": "0", - "web_search": "0" + "prompt": "0.00000042" }, "provider_display_name": "SiliconFlow", "provider_info": { @@ -140966,6 +138015,169 @@ "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.sourceful.com&size=256" }, "models": [ + { + "author": "sourceful", + "context_length": 8192, + "created_at": "2026-02-02T16:57:03+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "Riverflow V2 Fast is the fastest variant of Sourceful's Riverflow 2.0 lineup, best for production deployments and latency-critical workflows.\n\nThe Riverflow 2.0 series represents SOTA performance on image generation and editing tasks, using an integrated reasoning model to boost reliability and tackle complex challenges.\n\nPricing is $0.02 per 1K output image and $0.04 per 2K output image. Does not support 4K image output.\n\nAdditional features:\n- Custom font rendering via font_inputs ($0.03/font, max 2)\n- Image enhancement via super_resolution_references ($0.20/reference, max 4)\n\nSee the image generation docs for details: https://openrouter.ai/docs/features/multimodal/image-generation\n\nNote: Sourceful imposes a 4.5MB request size limit, therefore it is highly recommended to pass image URLs instead of Base64 data.", + "endpoint": { + "adapter_name": "SourcefulV2Adapter", + "can_abort": false, + "context_length": 8192, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://www.sourceful.com/legal/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.sourceful.com/legal/spring-terms-of-use", + "training": false + }, + "features": { + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "cf13168f-ca5a-498b-a260-d9127604db55", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "sourceful", + "context_length": 8192, + "created_at": "2026-02-02T16:57:03+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "Riverflow V2 Fast is the fastest variant of Sourceful's Riverflow 2.0 lineup, best for production deployments and latency-critical workflows.\n\nThe Riverflow 2.0 series represents SOTA performance on image generation and editing tasks, using an integrated reasoning model to boost reliability and tackle complex challenges.\n\nPricing is $0.02 per 1K output image and $0.04 per 2K output image. Does not support 4K image output.\n\nAdditional features:\n- Custom font rendering via font_inputs ($0.03/font, max 2)\n- Image enhancement via super_resolution_references ($0.20/reference, max 4)\n\nSee the image generation docs for details: https://openrouter.ai/docs/features/multimodal/image-generation\n\nNote: Sourceful imposes a 4.5MB request size limit, therefore it is highly recommended to pass image URLs instead of Base64 data.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": false, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Sourceful: Riverflow V2 Fast", + "output_modalities": ["image"], + "permaslug": "sourceful/riverflow-v2-fast-20260130", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Riverflow V2 Fast", + "slug": "sourceful/riverflow-v2-fast", + "updated_at": "2026-02-07T03:45:40.866152+00:00", + "warning_message": null + }, + "model_variant_permaslug": "sourceful/riverflow-v2-fast-20260130", + "model_variant_slug": "sourceful/riverflow-v2-fast", + "moderation_required": false, + "name": "Sourceful | sourceful/riverflow-v2-fast-20260130", + "pricing": { + "completion": "0", + "discount": 0, + "image_output": "0.00000479041916167665", + "prompt": "0" + }, + "provider_display_name": "Sourceful", + "provider_info": { + "adapterName": "SourcefulV2Adapter", + "baseUrl": "https://design-api.sourceful.com/v2", + "byokEnabled": false, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://www.sourceful.com/legal/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.sourceful.com/legal/spring-terms-of-use", + "training": false + }, + "displayName": "Sourceful", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": false, + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.sourceful.com&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Sourceful", + "owners": [], + "slug": "sourceful", + "statusPageUrl": null + }, + "provider_model_id": "riverflow-2-fast", + "provider_name": "Sourceful", + "provider_region": null, + "provider_slug": "sourceful", + "quantization": "unknown", + "supported_parameters": [], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": false, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": false, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Sourceful: Riverflow V2 Fast", + "output_modalities": ["image"], + "permaslug": "sourceful/riverflow-v2-fast-20260130", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Riverflow V2 Fast", + "slug": "sourceful/riverflow-v2-fast", + "updated_at": "2026-02-07T03:45:40.866152+00:00", + "warning_message": null + }, { "author": "sourceful", "context_length": 8192, @@ -141052,7 +138264,7 @@ "router": null, "short_name": "Riverflow V2 Fast Preview", "slug": "sourceful/riverflow-v2-fast-preview", - "updated_at": "2025-12-09T14:33:14.431363+00:00", + "updated_at": "2026-02-07T03:46:11.986235+00:00", "warning_message": null }, "model_variant_permaslug": "sourceful/riverflow-v2-fast-preview", @@ -141060,14 +138272,10 @@ "moderation_required": false, "name": "Sourceful | sourceful/riverflow-v2-fast-preview", "pricing": { - "completion": "0.00000719", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0.00000719", - "internal_reasoning": "0", - "prompt": "0", - "request": "0", - "web_search": "0" + "image_output": "0.00000718562874251497", + "prompt": "0" }, "provider_display_name": "Sourceful", "provider_info": { @@ -141136,7 +138344,7 @@ "router": null, "short_name": "Riverflow V2 Fast Preview", "slug": "sourceful/riverflow-v2-fast-preview", - "updated_at": "2025-12-09T14:33:14.431363+00:00", + "updated_at": "2026-02-07T03:46:11.986235+00:00", "warning_message": null }, { @@ -141225,7 +138433,7 @@ "router": null, "short_name": "Riverflow V2 Max Preview", "slug": "sourceful/riverflow-v2-max-preview", - "updated_at": "2025-12-09T14:33:09.632974+00:00", + "updated_at": "2026-02-07T03:45:50.814082+00:00", "warning_message": null }, "model_variant_permaslug": "sourceful/riverflow-v2-max-preview", @@ -141233,14 +138441,10 @@ "moderation_required": false, "name": "Sourceful | sourceful/riverflow-v2-max-preview", "pricing": { - "completion": "0.00001796", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0.00001796", - "internal_reasoning": "0", - "prompt": "0", - "request": "0", - "web_search": "0" + "image_output": "0.00001796407185628743", + "prompt": "0" }, "provider_display_name": "Sourceful", "provider_info": { @@ -141309,13 +138513,13 @@ "router": null, "short_name": "Riverflow V2 Max Preview", "slug": "sourceful/riverflow-v2-max-preview", - "updated_at": "2025-12-09T14:33:09.632974+00:00", + "updated_at": "2026-02-07T03:45:50.814082+00:00", "warning_message": null }, { "author": "sourceful", "context_length": 8192, - "created_at": "2025-12-08T23:50:36+00:00", + "created_at": "2026-02-02T16:57:07+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -141323,9 +138527,9 @@ }, "default_stops": [], "default_system": null, - "description": "Riverflow V2 Standard Preview is the standard variant of Sourceful's Riverflow V2 preview lineup. This preview version exceeds the performance of Riverflow 1 Family and is Sourceful's first unified text-to-image and image-to-image model family.\n\nPricing is $0.035 per output image, regardless of size.\n\nSourceful imposes a 4.5MB request size limit, therefore it is highly recommended to pass image URLs instead of Base64 data.", + "description": "Riverflow V2 Pro is the most powerful variant of Sourceful's Riverflow 2.0 lineup, best for top-tier control and perfect text rendering.\n\nThe Riverflow 2.0 series represents SOTA performance on image generation and editing tasks, using an integrated reasoning model to boost reliability and tackle complex challenges.\n\nPricing is $0.15 per 1K/2K output image and $0.33 per 4K output image.\n\nAdditional features:\n- Custom font rendering via font_inputs ($0.03/font, max 2)\n- Image enhancement via super_resolution_references ($0.20/reference, max 4)\n\nSee the image generation docs for details: https://openrouter.ai/docs/features/multimodal/image-generation\n\nNote: Sourceful imposes a 4.5MB request size limit, therefore it is highly recommended to pass image URLs instead of Base64 data.", "endpoint": { - "adapter_name": "SourcefulAdapter", + "adapter_name": "SourcefulV2Adapter", "can_abort": false, "context_length": 8192, "data_policy": { @@ -141336,8 +138540,6 @@ "training": false }, "features": { - "supports_input_audio": false, - "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -141347,7 +138549,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "69bbbbea-d1e2-47ed-8d2a-ee2e9a329005", + "id": "3b180cbc-27de-4246-b5bc-332adcdda392", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -141362,7 +138564,7 @@ "model": { "author": "sourceful", "context_length": 8192, - "created_at": "2025-12-08T23:50:36+00:00", + "created_at": "2026-02-02T16:57:07+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -141370,13 +138572,12 @@ }, "default_stops": [], "default_system": null, - "description": "Riverflow V2 Standard Preview is the standard variant of Sourceful's Riverflow V2 preview lineup. This preview version exceeds the performance of Riverflow 1 Family and is Sourceful's first unified text-to-image and image-to-image model family.\n\nPricing is $0.035 per output image, regardless of size.\n\nSourceful imposes a 4.5MB request size limit, therefore it is highly recommended to pass image URLs instead of Base64 data.", + "description": "Riverflow V2 Pro is the most powerful variant of Sourceful's Riverflow 2.0 lineup, best for top-tier control and perfect text rendering.\n\nThe Riverflow 2.0 series represents SOTA performance on image generation and editing tasks, using an integrated reasoning model to boost reliability and tackle complex challenges.\n\nPricing is $0.15 per 1K/2K output image and $0.33 per 4K output image.\n\nAdditional features:\n- Custom font rendering via font_inputs ($0.03/font, max 2)\n- Image enhancement via super_resolution_references ($0.20/reference, max 4)\n\nSee the image generation docs for details: https://openrouter.ai/docs/features/multimodal/image-generation\n\nNote: Sourceful imposes a 4.5MB request size limit, therefore it is highly recommended to pass image URLs instead of Base64 data.", "features": { "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, "group": "Other", @@ -141387,38 +138588,33 @@ "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Sourceful: Riverflow V2 Standard Preview", + "name": "Sourceful: Riverflow V2 Pro", "output_modalities": ["image"], - "permaslug": "sourceful/riverflow-v2-standard-preview", + "permaslug": "sourceful/riverflow-v2-pro-20260130", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "Riverflow V2 Standard Preview", - "slug": "sourceful/riverflow-v2-standard-preview", - "updated_at": "2025-12-09T14:33:12.112144+00:00", + "short_name": "Riverflow V2 Pro", + "slug": "sourceful/riverflow-v2-pro", + "updated_at": "2026-02-07T03:45:28.696144+00:00", "warning_message": null }, - "model_variant_permaslug": "sourceful/riverflow-v2-standard-preview", - "model_variant_slug": "sourceful/riverflow-v2-standard-preview", + "model_variant_permaslug": "sourceful/riverflow-v2-pro-20260130", + "model_variant_slug": "sourceful/riverflow-v2-pro", "moderation_required": false, - "name": "Sourceful | sourceful/riverflow-v2-standard-preview", + "name": "Sourceful | sourceful/riverflow-v2-pro-20260130", "pricing": { - "completion": "0.00000838", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0.00000838", - "internal_reasoning": "0", - "prompt": "0", - "request": "0", - "web_search": "0" + "image_output": "0.00003592814371257485", + "prompt": "0" }, "provider_display_name": "Sourceful", "provider_info": { - "adapterName": "SourcefulAdapter", - "baseUrl": "https://design-api.sourceful.com/v1", + "adapterName": "SourcefulV2Adapter", + "baseUrl": "https://design-api.sourceful.com/v2", "byokEnabled": false, "dataPolicy": { "canPublish": false, @@ -141443,7 +138639,7 @@ "slug": "sourceful", "statusPageUrl": null }, - "provider_model_id": "sourceful/riverflow-v2-standard-preview", + "provider_model_id": "riverflow-2-pro", "provider_name": "Sourceful", "provider_region": null, "provider_slug": "sourceful", @@ -141459,8 +138655,7 @@ "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, "group": "Other", @@ -141471,58 +138666,23 @@ "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Sourceful: Riverflow V2 Standard Preview", + "name": "Sourceful: Riverflow V2 Pro", "output_modalities": ["image"], - "permaslug": "sourceful/riverflow-v2-standard-preview", + "permaslug": "sourceful/riverflow-v2-pro-20260130", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "Riverflow V2 Standard Preview", - "slug": "sourceful/riverflow-v2-standard-preview", - "updated_at": "2025-12-09T14:33:12.112144+00:00", + "short_name": "Riverflow V2 Pro", + "slug": "sourceful/riverflow-v2-pro", + "updated_at": "2026-02-07T03:45:28.696144+00:00", "warning_message": null - } - ], - "name": "Sourceful", - "slug": "sourceful" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": true - }, - "datacenters": ["Global"], - "displayName": "Stealth", - "headquarters": "Unknown", - "icon": { - "className": "rounded-sm", - "url": "https://placehold.co/100?text=St&font=roboto" - }, - "models": [], - "name": "Stealth", - "slug": "stealth" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "datacenters": [], - "displayName": "StreamLake", - "headquarters": "CN", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.streamlake.com/&size=256" - }, - "models": [ + }, { - "author": "kwaipilot", - "context_length": 256000, - "created_at": "2025-11-10T03:38:32.123517+00:00", + "author": "sourceful", + "context_length": 8192, + "created_at": "2025-12-08T23:50:36+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -141530,20 +138690,21 @@ }, "default_stops": [], "default_system": null, - "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. \n\nThe model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.", + "description": "Riverflow V2 Standard Preview is the standard variant of Sourceful's Riverflow V2 preview lineup. This preview version exceeds the performance of Riverflow 1 Family and is Sourceful's first unified text-to-image and image-to-image model family.\n\nPricing is $0.035 per output image, regardless of size.\n\nSourceful imposes a 4.5MB request size limit, therefore it is highly recommended to pass image URLs instead of Base64 data.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "SourcefulAdapter", "can_abort": false, - "context_length": 256000, + "context_length": 8192, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.streamlake.ai/document/DOC/mgkci47q13qr66h9i54", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.streamlake.ai/document/DOC/mgkchnd89grpt1961fw", + "privacyPolicyURL": "https://www.sourceful.com/legal/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.sourceful.com/legal/spring-terms-of-use", "training": false }, "features": { "supports_input_audio": false, + "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -141553,7 +138714,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "90e9244d-599a-43c1-a921-b5cfb235f8ef", + "id": "69bbbbea-d1e2-47ed-8d2a-ee2e9a329005", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -141562,13 +138723,13 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "kwaipilot", - "context_length": 262144, - "created_at": "2025-11-10T03:38:32.123517+00:00", + "author": "sourceful", + "context_length": 8192, + "created_at": "2025-12-08T23:50:36+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -141576,11 +138737,9 @@ }, "default_stops": [], "default_system": null, - "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. \n\nThe model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.", + "description": "Riverflow V2 Standard Preview is the standard variant of Sourceful's Riverflow V2 preview lineup. This preview version exceeds the performance of Riverflow 1 Family and is Sourceful's first unified text-to-image and image-to-image model family.\n\nPricing is $0.035 per output image, regardless of size.\n\nSourceful imposes a 4.5MB request size limit, therefore it is highly recommended to pass image URLs instead of Base64 data.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -141588,96 +138747,79 @@ } }, "group": "Other", - "has_text_output": true, + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Kwaipilot: KAT-Coder-Pro V1", - "output_modalities": ["text"], - "permaslug": "kwaipilot/kat-coder-pro-v1", + "name": "Sourceful: Riverflow V2 Standard Preview", + "output_modalities": ["image"], + "permaslug": "sourceful/riverflow-v2-standard-preview", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "KAT-Coder-Pro V1", - "slug": "kwaipilot/kat-coder-pro", - "updated_at": "2026-01-11T23:46:45.088471+00:00", - "warning_message": "The free endpoint has reduced rate limits from January 5-12, after which it will be deprecated. Use [kwaipilot/kat-coder-pro-v1](https://openrouter.ai/kwaipilot/kat-coder-pro) for continued access." + "short_name": "Riverflow V2 Standard Preview", + "slug": "sourceful/riverflow-v2-standard-preview", + "updated_at": "2026-02-07T03:46:03.42011+00:00", + "warning_message": null }, - "model_variant_permaslug": "kwaipilot/kat-coder-pro-v1", - "model_variant_slug": "kwaipilot/kat-coder-pro", + "model_variant_permaslug": "sourceful/riverflow-v2-standard-preview", + "model_variant_slug": "sourceful/riverflow-v2-standard-preview", "moderation_required": false, - "name": "StreamLake | kwaipilot/kat-coder-pro-v1", + "name": "Sourceful | sourceful/riverflow-v2-standard-preview", "pricing": { - "completion": "0.000000828", - "discount": 0.31, - "image": "0", - "image_output": "0", - "input_cache_read": "0.0000000414", - "internal_reasoning": "0", - "prompt": "0.000000207", - "request": "0", - "web_search": "0" + "completion": "0", + "discount": 0, + "image_output": "0.00000838323353293413", + "prompt": "0" }, - "provider_display_name": "StreamLake", + "provider_display_name": "Sourceful", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://vanchin.streamlake.ai/api/gateway/v1/endpoints", + "adapterName": "SourcefulAdapter", + "baseUrl": "https://design-api.sourceful.com/v1", "byokEnabled": false, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.streamlake.ai/document/DOC/mgkci47q13qr66h9i54", - "retainsPrompts": false, - "termsOfServiceURL": "https://www.streamlake.ai/document/DOC/mgkchnd89grpt1961fw", + "privacyPolicyURL": "https://www.sourceful.com/legal/privacy-policy", + "retainsPrompts": true, + "termsOfServiceURL": "https://www.sourceful.com/legal/spring-terms-of-use", "training": false }, - "displayName": "StreamLake", + "displayName": "Sourceful", "editors": [], "hasChatCompletions": true, "hasCompletions": false, - "headquarters": "CN", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.streamlake.com/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.sourceful.com&size=256" }, "ignoredProviderModels": [], "isAbortable": false, "isMultipartSupported": true, "moderationRequired": false, - "name": "StreamLake", + "name": "Sourceful", "owners": [], - "slug": "streamlake", + "slug": "sourceful", "statusPageUrl": null }, - "provider_model_id": "ep-kvn41v-1767582620105961754", - "provider_name": "StreamLake", + "provider_model_id": "sourceful/riverflow-v2-standard-preview", + "provider_name": "Sourceful", "provider_region": null, - "provider_slug": "streamlake/fp16", - "quantization": "fp16", - "supported_parameters": [ - "max_tokens", - "temperature", - "top_p", - "frequency_penalty", - "tool_choice", - "tools", - "structured_outputs", - "response_format" - ], + "provider_slug": "sourceful", + "quantization": "unknown", + "supported_parameters": [], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, @@ -141685,63 +138827,65 @@ } }, "group": "Other", - "has_text_output": true, + "has_text_output": false, "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Kwaipilot: KAT-Coder-Pro V1", - "output_modalities": ["text"], - "permaslug": "kwaipilot/kat-coder-pro-v1", + "name": "Sourceful: Riverflow V2 Standard Preview", + "output_modalities": ["image"], + "permaslug": "sourceful/riverflow-v2-standard-preview", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "KAT-Coder-Pro V1", - "slug": "kwaipilot/kat-coder-pro", - "updated_at": "2026-01-11T23:46:45.088471+00:00", - "warning_message": "The free endpoint has reduced rate limits from January 5-12, after which it will be deprecated. Use [kwaipilot/kat-coder-pro-v1](https://openrouter.ai/kwaipilot/kat-coder-pro) for continued access." + "short_name": "Riverflow V2 Standard Preview", + "slug": "sourceful/riverflow-v2-standard-preview", + "updated_at": "2026-02-07T03:46:03.42011+00:00", + "warning_message": null } ], - "name": "StreamLake", - "slug": "streamlake" + "name": "Sourceful", + "slug": "sourceful" }, { "dataPolicy": { "canPublish": false, "retainsPrompts": true, - "training": false + "training": true }, - "datacenters": ["US"], - "displayName": "Switchpoint", - "headquarters": "US", + "displayName": "Stealth", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://switchpoint.dev/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/Stealth.svg" }, "models": [ { - "author": "switchpoint", - "context_length": 131072, - "created_at": "2025-07-11T22:28:19+00:00", - "default_parameters": {}, + "author": "openrouter", + "context_length": 128000, + "created_at": "2026-02-09T04:27:05.486951+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library. \n\nAs the world of LLMs advances, our router gets smarter, ensuring you always benefit from the industry's newest models without changing your workflow.\n\nThis model is configured for a simple, flat rate per response here on OpenRouter. It's powered by the full routing engine from [Switchpoint AI](https://www.switchpoint.dev).", + "description": "This is a cloaked model provided to the community to gather feedback. A reasoning model designed for speed. It is built for coding assistants, real-time conversational applications, and agentic workflows.\n\nDefault reasoning effort is set to medium for fast responses. For agentic coding use cases, we recommend changing effort to high. \n\nNote: All prompts and completions for this model are logged by the provider and may be used to improve the model.", "endpoint": { - "adapter_name": "OpenAIAdapter", + "adapter_name": "StealthAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 128000, "data_policy": { "canPublish": false, "retainsPrompts": true, - "training": false + "training": true }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -141751,31 +138895,35 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f4ebfc98-c97d-4aa2-8ad0-3dbbbc137398", + "id": "9dcd058f-9135-4156-956c-98216bd3dd03", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 50, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 50000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "switchpoint", - "context_length": 131072, - "created_at": "2025-07-11T22:28:19+00:00", - "default_parameters": {}, + "author": "openrouter", + "context_length": 128000, + "created_at": "2026-02-09T04:27:05.486951+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library. \n\nAs the world of LLMs advances, our router gets smarter, ensuring you always benefit from the industry's newest models without changing your workflow.\n\nThis model is configured for a simple, flat rate per response here on OpenRouter. It's powered by the full routing engine from [Switchpoint AI](https://www.switchpoint.dev).", + "description": "This is a cloaked model provided to the community to gather feedback. A reasoning model designed for speed. It is built for coding assistants, real-time conversational applications, and agentic workflows.\n\nDefault reasoning effort is set to medium for fast responses. For agentic coding use cases, we recommend changing effort to high. \n\nNote: All prompts and completions for this model are logged by the provider and may be used to improve the model.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, "group": "Other", @@ -141786,87 +138934,87 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Switchpoint Router", + "name": "Aurora Alpha", "output_modalities": ["text"], - "permaslug": "switchpoint/router", + "permaslug": "openrouter/aurora-alpha", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "Switchpoint Router", - "slug": "switchpoint/router", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null + "short_name": "Aurora Alpha", + "slug": "openrouter/aurora-alpha", + "updated_at": "2026-02-09T17:13:05.11019+00:00", + "warning_message": "All prompts and completions for this model are logged by the provider and may be used to improve the model." }, - "model_variant_permaslug": "switchpoint/router", - "model_variant_slug": "switchpoint/router", + "model_variant_permaslug": "openrouter/aurora-alpha", + "model_variant_slug": "openrouter/aurora-alpha", "moderation_required": false, - "name": "Switchpoint | switchpoint/router", + "name": "Stealth | openrouter/aurora-alpha", "pricing": { - "completion": "0.0000034", + "completion": "0", "discount": 0, "image": "0", "image_output": "0", "internal_reasoning": "0", - "prompt": "0.00000085", + "prompt": "0", "request": "0", "web_search": "0" }, - "provider_display_name": "Switchpoint", + "provider_display_name": "Stealth", "provider_info": { - "adapterName": "OpenAIAdapter", - "baseUrl": "https://www.switchpoint.dev/v1", - "byokEnabled": true, + "adapterName": "StealthAdapter", + "baseUrl": " ", + "byokEnabled": false, "dataPolicy": { "canPublish": false, "retainsPrompts": true, - "training": false + "training": true }, - "displayName": "Switchpoint", - "editors": [], + "displayName": "Stealth", + "editors": ["{}"], "hasChatCompletions": true, "hasCompletions": true, - "headquarters": "US", "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://switchpoint.dev/&size=256" + "className": "invert-0 dark:invert", + "url": "/images/icons/Stealth.svg" }, - "ignoredProviderModels": ["openrouter/switchpoint-router"], + "ignoredProviderModels": [], "isAbortable": true, - "isMultipartSupported": false, + "isMultipartSupported": true, "moderationRequired": false, - "name": "Switchpoint", - "owners": [], - "slug": "switchpoint", + "name": "Stealth", + "owners": ["{}"], + "slug": "stealth", "statusPageUrl": null }, - "provider_model_id": "stable", - "provider_name": "Switchpoint", + "provider_model_id": "openrouter/aurora-alpha", + "provider_name": "Stealth", "provider_region": null, - "provider_slug": "switchpoint", + "provider_slug": "stealth", "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", "max_tokens", + "response_format", + "structured_outputs", + "reasoning_effort", "temperature", - "top_p", - "stop", - "top_k", - "seed" + "tools", + "tool_choice" ], - "supports_multipart": false, + "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null } }, "group": "Other", @@ -141877,33 +139025,910 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Switchpoint Router", + "name": "Aurora Alpha", "output_modalities": ["text"], - "permaslug": "switchpoint/router", + "permaslug": "openrouter/aurora-alpha", "reasoning_config": { "end_token": null, - "start_token": null, - "system_prompt": null + "start_token": null }, "router": null, - "short_name": "Switchpoint Router", - "slug": "switchpoint/router", - "updated_at": "2025-11-10T16:00:38.246665+00:00", - "warning_message": null - } - ], - "name": "Switchpoint", - "slug": "switchpoint" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "Together", - "headquarters": "US", - "icon": { + "short_name": "Aurora Alpha", + "slug": "openrouter/aurora-alpha", + "updated_at": "2026-02-09T17:13:05.11019+00:00", + "warning_message": "All prompts and completions for this model are logged by the provider and may be used to improve the model." + }, + { + "author": "Other", + "context_length": 256000, + "created_at": "2025-08-26T20:08:47.000Z", + "default_parameters": null, + "default_stops": [], + "default_system": null, + "description": "Giga Potato is a stealth model deeply optimized for agentic programming, with visual understanding capability. It is provided free of charge in Kilo Code for a limited time.\n**Note:** Prompts and completions are logged and may be used to improve the model.", + "endpoint": { + "adapter_name": "other", + "can_abort": true, + "context_length": 256000, + "data_policy": { + "canPublish": false, + "retainsPrompts": true, + "training": true + }, + "features": null, + "has_chat_completions": true, + "has_completions": false, + "id": "giga-potato", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 32000, + "max_prompt_images": null, + "max_prompt_tokens": 256000, + "max_tokens_per_image": null, + "model": { + "author": "Other", + "context_length": 256000, + "created_at": "2025-08-26T20:08:47.000Z", + "default_parameters": null, + "default_stops": [], + "default_system": null, + "description": "Giga Potato is a stealth model deeply optimized for agentic programming, with visual understanding capability. It is provided free of charge in Kilo Code for a limited time.\n**Note:** Prompts and completions are logged and may be used to improve the model.", + "features": null, + "group": "other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Giga Potato (free)", + "output_modalities": ["text"], + "permaslug": "giga-potato", + "reasoning_config": null, + "router": null, + "short_name": "Giga Potato (free)", + "slug": "giga-potato", + "updated_at": "2026-02-10T10:49:03.904Z", + "warning_message": null + }, + "model_variant_permaslug": "giga-potato", + "model_variant_slug": "giga-potato", + "moderation_required": false, + "name": "Giga Potato (free)", + "pricing": { + "completion": "0.0000000", + "discount": 0, + "image": "0", + "image_output": "0", + "input_cache_read": "0.00000000", + "internal_reasoning": "0", + "prompt": "0.0000000", + "request": "0", + "web_search": "0" + }, + "provider_display_name": "Other", + "provider_info": { + "adapterName": "other", + "baseUrl": "https://kilo.ai", + "byokEnabled": false, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": true + }, + "displayName": "Other", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": false, + "headquarters": "Unknown", + "icon": { + "className": "rounded-sm", + "url": "https://via.placeholder.com/32x32/000000/FFFFFF?text=S" + }, + "ignoredProviderModels": [], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Other", + "owners": [], + "slug": "other", + "statusPageUrl": null + }, + "provider_model_id": "giga-potato", + "provider_name": "Other", + "provider_region": null, + "provider_slug": "other", + "quantization": null, + "supported_parameters": [ + "max_tokens", + "temperature", + "tools", + "reasoning", + "include_reasoning" + ], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "default" + }, + "features": null, + "group": "other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Giga Potato (free)", + "output_modalities": ["text"], + "permaslug": "giga-potato", + "reasoning_config": null, + "router": null, + "short_name": "Giga Potato (free)", + "slug": "giga-potato", + "updated_at": "2026-02-10T10:49:03.904Z", + "warning_message": null + }, + { + "author": "openrouter", + "context_length": 200000, + "created_at": "2026-02-06T16:04:15.762765+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], + "default_system": null, + "description": "Pony is a cutting-edge foundation model with strong performance in coding, agentic workflows, reasoning, and roleplay, making it well suited for hands-on coding and real-world use.\n\n**Note:** All prompts and completions for this model are logged by the provider and may be used to improve the model.", + "endpoint": { + "adapter_name": "StealthAdapter", + "can_abort": true, + "context_length": 200000, + "data_policy": { + "canPublish": false, + "retainsPrompts": true, + "training": true + }, + "features": { + "reasoning_return_mechanism": "reasoning-content", + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "c965370e-6a28-498d-8399-b95ed0ddfd77", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": 100, + "limit_rpm_cf": null, + "max_completion_tokens": 131000, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "openrouter", + "context_length": 200000, + "created_at": "2026-02-06T16:04:15.762765+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, + "default_stops": [], + "default_system": null, + "description": "Pony is a cutting-edge foundation model with strong performance in coding, agentic workflows, reasoning, and roleplay, making it well suited for hands-on coding and real-world use.\n\n**Note:** All prompts and completions for this model are logged by the provider and may be used to improve the model.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Pony Alpha", + "output_modalities": ["text"], + "permaslug": "openrouter/pony-alpha", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Pony Alpha", + "slug": "openrouter/pony-alpha", + "updated_at": "2026-02-06T17:43:31.804627+00:00", + "warning_message": "All prompts and completions for this model are logged by the provider and may be used to improve the model." + }, + "model_variant_permaslug": "openrouter/pony-alpha", + "model_variant_slug": "openrouter/pony-alpha", + "moderation_required": false, + "name": "Stealth | openrouter/pony-alpha", + "pricing": { + "completion": "0", + "discount": 0, + "image": "0", + "image_output": "0", + "internal_reasoning": "0", + "prompt": "0", + "request": "0", + "web_search": "0" + }, + "provider_display_name": "Stealth", + "provider_info": { + "adapterName": "StealthAdapter", + "baseUrl": " ", + "byokEnabled": false, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": true + }, + "displayName": "Stealth", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": true, + "icon": { + "className": "invert-0 dark:invert", + "url": "/images/icons/Stealth.svg" + }, + "ignoredProviderModels": [], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Stealth", + "owners": ["{}"], + "slug": "stealth", + "statusPageUrl": null + }, + "provider_model_id": "openrouter/pony-alpha", + "provider_name": "Stealth", + "provider_region": null, + "provider_slug": "stealth", + "quantization": "unknown", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "response_format", + "structured_outputs", + "tools", + "temperature", + "top_p" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Pony Alpha", + "output_modalities": ["text"], + "permaslug": "openrouter/pony-alpha", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Pony Alpha", + "slug": "openrouter/pony-alpha", + "updated_at": "2026-02-06T17:43:31.804627+00:00", + "warning_message": "All prompts and completions for this model are logged by the provider and may be used to improve the model." + } + ], + "name": "Stealth", + "slug": "stealth" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "datacenters": [], + "displayName": "StepFun", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://stepfun.ai&size=256" + }, + "models": [ + { + "author": "stepfun", + "context_length": 256000, + "created_at": "2026-01-29T23:12:17.060093+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token. It is a reasoning model that is incredibly speed efficient even at long contexts.", + "endpoint": { + "adapter_name": "OpenAIAdapter", + "can_abort": true, + "context_length": 256000, + "data_policy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "features": { + "disable_free_endpoint_limits": true, + "reasoning_return_mechanism": "reasoning-content", + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "8a9f6e11-a2df-4ac5-90c5-e6c7ef96955c", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": 50, + "limit_rpm_cf": null, + "max_completion_tokens": 256000, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "stepfun", + "context_length": 256000, + "created_at": "2026-01-29T23:12:17.060093+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token. It is a reasoning model that is incredibly speed efficient even at long contexts.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "stepfun-ai/Step-3.5-Flash", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "StepFun: Step 3.5 Flash", + "output_modalities": ["text"], + "permaslug": "stepfun/step-3.5-flash", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Step 3.5 Flash", + "slug": "stepfun/step-3.5-flash", + "updated_at": "2026-02-02T22:21:45.984731+00:00", + "warning_message": null + }, + "model_variant_permaslug": "stepfun/step-3.5-flash:free", + "model_variant_slug": "stepfun/step-3.5-flash:free", + "moderation_required": false, + "name": "StepFun | stepfun/step-3.5-flash:free", + "pricing": { + "completion": "0", + "discount": 0, + "prompt": "0" + }, + "provider_display_name": "StepFun", + "provider_info": { + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.stepfun.ai/v1", + "byokEnabled": false, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "displayName": "StepFun", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": true, + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://stepfun.ai&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "StepFun", + "owners": ["org_38x7nyArKRacEJKcYmSSuKJ3Ba3"], + "slug": "stepfun", + "statusPageUrl": null + }, + "provider_model_id": "step-3.5-flash", + "provider_name": "StepFun", + "provider_region": null, + "provider_slug": "stepfun/fp8", + "quantization": "fp8", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "temperature", + "max_tokens", + "top_p", + "stop", + "tools", + "frequency_penalty" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "free" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "stepfun-ai/Step-3.5-Flash", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "StepFun: Step 3.5 Flash (free)", + "output_modalities": ["text"], + "permaslug": "stepfun/step-3.5-flash", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Step 3.5 Flash (free)", + "slug": "stepfun/step-3.5-flash", + "updated_at": "2026-02-02T22:21:45.984731+00:00", + "warning_message": null + } + ], + "name": "StepFun", + "slug": "stepfun" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "datacenters": [], + "displayName": "StreamLake", + "headquarters": "CN", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.streamlake.com/&size=256" + }, + "models": [ + { + "author": "kwaipilot", + "context_length": 256000, + "created_at": "2025-11-10T03:38:32.123517+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. \n\nThe model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.", + "endpoint": { + "adapter_name": "OpenAIAdapter", + "can_abort": false, + "context_length": 256000, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://www.streamlake.ai/document/DOC/mgkci47q13qr66h9i54", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.streamlake.ai/document/DOC/mgkchnd89grpt1961fw", + "training": false + }, + "features": { + "supports_input_audio": false, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "90e9244d-599a-43c1-a921-b5cfb235f8ef", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "kwaipilot", + "context_length": 262144, + "created_at": "2025-11-10T03:38:32.123517+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KAT-Coder series. Designed specifically for agentic coding tasks, it excels in real-world software engineering scenarios, achieving 73.4% solve rate on the SWE-Bench Verified benchmark. \n\nThe model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization, and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning (SFT), reinforcement fine-tuning (RFT), and scalable agentic RL.", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Kwaipilot: KAT-Coder-Pro V1", + "output_modalities": ["text"], + "permaslug": "kwaipilot/kat-coder-pro-v1", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, + "router": null, + "short_name": "KAT-Coder-Pro V1", + "slug": "kwaipilot/kat-coder-pro", + "updated_at": "2026-01-11T23:46:45.088471+00:00", + "warning_message": "The free endpoint has reduced rate limits from January 5-12, after which it will be deprecated. Use [kwaipilot/kat-coder-pro-v1](https://openrouter.ai/kwaipilot/kat-coder-pro) for continued access." + }, + "model_variant_permaslug": "kwaipilot/kat-coder-pro-v1", + "model_variant_slug": "kwaipilot/kat-coder-pro", + "moderation_required": false, + "name": "StreamLake | kwaipilot/kat-coder-pro-v1", + "pricing": { + "completion": "0.000000828", + "discount": 0.31, + "input_cache_read": "0.0000000414", + "prompt": "0.000000207" + }, + "provider_display_name": "StreamLake", + "provider_info": { + "adapterName": "OpenAIAdapter", + "baseUrl": "https://vanchin.streamlake.ai/api/gateway/v1/endpoints", + "byokEnabled": false, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://www.streamlake.ai/document/DOC/mgkci47q13qr66h9i54", + "retainsPrompts": false, + "termsOfServiceURL": "https://www.streamlake.ai/document/DOC/mgkchnd89grpt1961fw", + "training": false + }, + "displayName": "StreamLake", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": false, + "headquarters": "CN", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.streamlake.com/&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "StreamLake", + "owners": [], + "slug": "streamlake", + "statusPageUrl": null + }, + "provider_model_id": "ep-kvn41v-1767582620105961754", + "provider_name": "StreamLake", + "provider_region": null, + "provider_slug": "streamlake/fp16", + "quantization": "fp16", + "supported_parameters": [ + "max_tokens", + "temperature", + "top_p", + "frequency_penalty", + "tool_choice", + "tools", + "structured_outputs", + "response_format" + ], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Kwaipilot: KAT-Coder-Pro V1", + "output_modalities": ["text"], + "permaslug": "kwaipilot/kat-coder-pro-v1", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, + "router": null, + "short_name": "KAT-Coder-Pro V1", + "slug": "kwaipilot/kat-coder-pro", + "updated_at": "2026-01-11T23:46:45.088471+00:00", + "warning_message": "The free endpoint has reduced rate limits from January 5-12, after which it will be deprecated. Use [kwaipilot/kat-coder-pro-v1](https://openrouter.ai/kwaipilot/kat-coder-pro) for continued access." + } + ], + "name": "StreamLake", + "slug": "streamlake" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "datacenters": ["US"], + "displayName": "Switchpoint", + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://switchpoint.dev/&size=256" + }, + "models": [ + { + "author": "switchpoint", + "context_length": 131072, + "created_at": "2025-07-11T22:28:19+00:00", + "default_parameters": {}, + "default_stops": [], + "default_system": null, + "description": "Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library. \n\nAs the world of LLMs advances, our router gets smarter, ensuring you always benefit from the industry's newest models without changing your workflow.\n\nThis model is configured for a simple, flat rate per response here on OpenRouter. It's powered by the full routing engine from [Switchpoint AI](https://www.switchpoint.dev).", + "endpoint": { + "adapter_name": "OpenAIAdapter", + "can_abort": true, + "context_length": 131072, + "data_policy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "features": { + "supported_parameters": {}, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "f4ebfc98-c97d-4aa2-8ad0-3dbbbc137398", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "switchpoint", + "context_length": 131072, + "created_at": "2025-07-11T22:28:19+00:00", + "default_parameters": {}, + "default_stops": [], + "default_system": null, + "description": "Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library. \n\nAs the world of LLMs advances, our router gets smarter, ensuring you always benefit from the industry's newest models without changing your workflow.\n\nThis model is configured for a simple, flat rate per response here on OpenRouter. It's powered by the full routing engine from [Switchpoint AI](https://www.switchpoint.dev).", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Switchpoint Router", + "output_modalities": ["text"], + "permaslug": "switchpoint/router", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, + "router": null, + "short_name": "Switchpoint Router", + "slug": "switchpoint/router", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "switchpoint/router", + "model_variant_slug": "switchpoint/router", + "moderation_required": false, + "name": "Switchpoint | switchpoint/router", + "pricing": { + "completion": "0.0000034", + "discount": 0, + "prompt": "0.00000085" + }, + "provider_display_name": "Switchpoint", + "provider_info": { + "adapterName": "OpenAIAdapter", + "baseUrl": "https://www.switchpoint.dev/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "displayName": "Switchpoint", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": true, + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://switchpoint.dev/&size=256" + }, + "ignoredProviderModels": ["openrouter/switchpoint-router"], + "isAbortable": true, + "isMultipartSupported": false, + "moderationRequired": false, + "name": "Switchpoint", + "owners": [], + "slug": "switchpoint", + "statusPageUrl": null + }, + "provider_model_id": "stable", + "provider_name": "Switchpoint", + "provider_region": null, + "provider_slug": "switchpoint", + "quantization": "unknown", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p", + "stop", + "top_k", + "seed" + ], + "supports_multipart": false, + "supports_reasoning": true, + "supports_tool_parameters": false, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Switchpoint Router", + "output_modalities": ["text"], + "permaslug": "switchpoint/router", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, + "router": null, + "short_name": "Switchpoint Router", + "slug": "switchpoint/router", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + } + ], + "name": "Switchpoint", + "slug": "switchpoint" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Together", + "headquarters": "US", + "icon": { "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.together.ai/&size=256" }, "models": [ @@ -141983,12 +140008,7 @@ "pricing": { "completion": "0.0000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, "provider_display_name": "Together", "provider_info": { @@ -142141,7 +140161,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -142269,12 +140290,7 @@ "pricing": { "completion": "0.0000033", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000009", - "request": "0", - "web_search": "0" + "prompt": "0.0000009" }, "provider_display_name": "Together", "provider_info": { @@ -142427,7 +140443,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -142555,12 +140572,7 @@ "pricing": { "completion": "0.00000018", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000018", - "request": "0", - "web_search": "0" + "prompt": "0.00000018" }, "provider_display_name": "Together", "provider_info": { @@ -142713,7 +140725,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -142861,12 +140874,7 @@ "pricing": { "completion": "0.00000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000000045", - "request": "0", - "web_search": "0" + "prompt": "0.000000045" }, "provider_display_name": "Together", "provider_info": { @@ -143019,7 +141027,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -143164,12 +141173,7 @@ "pricing": { "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000075", - "request": "0", - "web_search": "0" + "prompt": "0.00000075" }, "provider_display_name": "Together", "provider_info": { @@ -143322,7 +141326,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -143378,16 +141383,20 @@ }, { "author": "deepcogito", - "context_length": 32767, - "created_at": "2025-09-02T16:46:08.407814+00:00", - "default_parameters": {}, + "context_length": 128000, + "created_at": "2025-11-13T22:00:33.034408+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "An instruction-tuned, hybrid-reasoning Mixture-of-Experts model built on Llama-4-Scout-17B-16E. Cogito v2 can answer directly or engage an extended “thinking” phase, with alignment guided by Iterated Distillation & Amplification (IDA). It targets coding, STEM, instruction following, and general helpfulness, with stronger multilingual, tool-calling, and reasoning performance than size-equivalent baselines. The model supports long-context use (up to 10M tokens) and standard Transformers workflows. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Cogito v2.1 671B MoE represents one of the strongest open models globally, matching performance of frontier closed and open models. This model is trained using self play with reinforcement learning to reach state-of-the-art performance on multiple categories (instruction following, coding, longer queries and creative writing). This advanced system demonstrates significant progress toward scalable superintelligence through policy improvement.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32767, + "context_length": 128000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -143396,6 +141405,7 @@ "training": false }, "features": { + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -143405,7 +141415,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "349f6bd6-3722-4ffd-a1af-cee16a5d6784", + "id": "3ab8816e-40d5-4ad5-95f4-755cc0f0a8f8", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -143419,54 +141429,54 @@ "max_tokens_per_image": null, "model": { "author": "deepcogito", - "context_length": 131072, - "created_at": "2025-09-02T16:46:08.407814+00:00", - "default_parameters": {}, + "context_length": 128000, + "created_at": "2025-11-13T22:00:33.034408+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "An instruction-tuned, hybrid-reasoning Mixture-of-Experts model built on Llama-4-Scout-17B-16E. Cogito v2 can answer directly or engage an extended “thinking” phase, with alignment guided by Iterated Distillation & Amplification (IDA). It targets coding, STEM, instruction following, and general helpfulness, with stronger multilingual, tool-calling, and reasoning performance than size-equivalent baselines. The model supports long-context use (up to 10M tokens) and standard Transformers workflows. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Cogito v2.1 671B MoE represents one of the strongest open models globally, matching performance of frontier closed and open models. This model is trained using self play with reinforcement learning to reach state-of-the-art performance on multiple categories (instruction following, coding, longer queries and creative writing). This advanced system demonstrates significant progress toward scalable superintelligence through policy improvement.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "Enable deep thinking subroutine." + "system_prompt": null } }, - "group": "Llama4", + "group": "Other", "has_text_output": true, - "hf_slug": "deepcogito/cogito-v2-preview-llama-109B-MoE", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Cogito V2 Preview Llama 109B", + "name": "Deep Cogito: Cogito v2.1 671B", "output_modalities": ["text"], - "permaslug": "deepcogito/cogito-v2-preview-llama-109b-moe", + "permaslug": "deepcogito/cogito-v2.1-671b-20251118", "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "Enable deep thinking subroutine." + "system_prompt": null }, "router": null, - "short_name": "Cogito V2 Preview Llama 109B", - "slug": "deepcogito/cogito-v2-preview-llama-109b-moe", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Cogito v2.1 671B", + "slug": "deepcogito/cogito-v2.1-671b", + "updated_at": "2025-11-19T16:54:44.087083+00:00", "warning_message": null }, - "model_variant_permaslug": "deepcogito/cogito-v2-preview-llama-109b-moe", - "model_variant_slug": "deepcogito/cogito-v2-preview-llama-109b-moe", + "model_variant_permaslug": "deepcogito/cogito-v2.1-671b-20251118", + "model_variant_slug": "deepcogito/cogito-v2.1-671b", "moderation_required": false, - "name": "Together | deepcogito/cogito-v2-preview-llama-109b-moe", + "name": "Together | deepcogito/cogito-v2.1-671b-20251118", "pricing": { - "completion": "0.00000059", + "completion": "0.00000125", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000018", - "request": "0", - "web_search": "0" + "prompt": "0.00000125" }, "provider_display_name": "Together", "provider_info": { @@ -143619,7 +141629,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -143629,7 +141640,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "deepcogito/cogito-v2-preview-llama-109B-MoE", + "provider_model_id": "deepcogito/cogito-v2-1-671b", "provider_name": "Together", "provider_region": null, "provider_slug": "together", @@ -143647,60 +141658,61 @@ "repetition_penalty", "logit_bias", "min_p", - "tools", - "tool_choice" + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "Enable deep thinking subroutine." + "system_prompt": null } }, - "group": "Llama4", + "group": "Other", "has_text_output": true, - "hf_slug": "deepcogito/cogito-v2-preview-llama-109B-MoE", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Cogito V2 Preview Llama 109B", + "name": "Deep Cogito: Cogito v2.1 671B", "output_modalities": ["text"], - "permaslug": "deepcogito/cogito-v2-preview-llama-109b-moe", + "permaslug": "deepcogito/cogito-v2.1-671b-20251118", "reasoning_config": { "end_token": "", "start_token": "", - "system_prompt": "Enable deep thinking subroutine." + "system_prompt": null }, "router": null, - "short_name": "Cogito V2 Preview Llama 109B", - "slug": "deepcogito/cogito-v2-preview-llama-109b-moe", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Cogito v2.1 671B", + "slug": "deepcogito/cogito-v2.1-671b", + "updated_at": "2025-11-19T16:54:44.087083+00:00", "warning_message": null }, { - "author": "deepcogito", - "context_length": 32768, - "created_at": "2025-10-17T14:05:33.76679+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Cogito v2 405B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection. It represents a significant step toward frontier intelligence with dense architecture delivering performance competitive with leading closed models. This advanced reasoning system combines policy improvement with massive scale for exceptional capabilities.\n", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 163840, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -143709,11 +141721,7 @@ "training": false }, "features": { - "is_mandatory_reasoning": false, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -143723,7 +141731,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cfe62eed-07fb-4988-8495-d4b8e6892fa0", + "id": "1203e612-3ca7-4f78-9ceb-dc6ac64ec069", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -143732,63 +141740,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepcogito", - "context_length": 131072, - "created_at": "2025-10-17T14:05:33.76679+00:00", + "author": "deepseek", + "context_length": 163840, + "created_at": "2025-05-28T17:59:30.833128+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Cogito v2 405B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection. It represents a significant step toward frontier intelligence with dense architecture delivering performance competitive with leading closed models. This advanced reasoning system combines policy improvement with massive scale for exceptional capabilities.\n", + "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "Llama3", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepcogito/cogito-v2-preview-llama-405B", + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "Deep Cogito: Cogito V2 Preview Llama 405B", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "deepcogito/cogito-v2-preview-llama-405b", + "permaslug": "deepseek/deepseek-r1-0528", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Cogito V2 Preview Llama 405B", - "slug": "deepcogito/cogito-v2-preview-llama-405b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, - "model_variant_permaslug": "deepcogito/cogito-v2-preview-llama-405b", - "model_variant_slug": "deepcogito/cogito-v2-preview-llama-405b", + "model_variant_permaslug": "deepseek/deepseek-r1-0528", + "model_variant_slug": "deepseek/deepseek-r1-0528", "moderation_required": false, - "name": "Together | deepcogito/cogito-v2-preview-llama-405b", + "name": "Together | deepseek/deepseek-r1-0528", "pricing": { - "completion": "0.0000035", + "completion": "0.000007", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000035", - "request": "0", - "web_search": "0" + "prompt": "0.000003" }, "provider_display_name": "Together", "provider_info": { @@ -143941,7 +141945,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -143951,16 +141956,14 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "deepcogito/cogito-v2-preview-llama-405B", + "provider_model_id": "deepseek-ai/DeepSeek-R1", "provider_name": "Together", "provider_region": null, - "provider_slug": "together", - "quantization": "unknown", + "provider_slug": "together/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", @@ -143970,61 +141973,56 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null } }, - "group": "Llama3", + "group": "DeepSeek", "has_text_output": true, - "hf_slug": "deepcogito/cogito-v2-preview-llama-405B", + "hf_slug": "deepseek-ai/DeepSeek-R1-0528", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "deepseek-r1", "model_version_group_id": null, - "name": "Deep Cogito: Cogito V2 Preview Llama 405B", + "name": "DeepSeek: R1 0528", "output_modalities": ["text"], - "permaslug": "deepcogito/cogito-v2-preview-llama-405b", + "permaslug": "deepseek/deepseek-r1-0528", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Cogito V2 Preview Llama 405B", - "slug": "deepcogito/cogito-v2-preview-llama-405b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "R1 0528", + "slug": "deepseek/deepseek-r1-0528", + "updated_at": "2026-01-08T20:10:31.314892+00:00", "warning_message": null }, { - "author": "deepcogito", - "context_length": 32768, - "created_at": "2025-09-02T16:49:44.153462+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-01-23T20:12:49.780212+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Cogito v2 70B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection. Built with iterative policy improvement, it delivers strong performance across reasoning tasks while maintaining efficiency through shorter reasoning chains and improved intuition.", + "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -144033,11 +142031,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": false, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -144047,7 +142040,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "0a2d8122-db47-42a6-b7cd-4d3646ee32a6", + "id": "c4f1a7bc-f6b6-4607-9b71-4804c2e5a5f8", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -144056,63 +142049,52 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 32768, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepcogito", - "context_length": 131072, - "created_at": "2025-09-02T16:49:44.153462+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "deepseek", + "context_length": 128000, + "created_at": "2025-01-23T20:12:49.780212+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], "default_system": null, - "description": "Cogito v2 70B is a dense hybrid reasoning model that combines direct answering capabilities with advanced self-reflection. Built with iterative policy improvement, it delivers strong performance across reasoning tasks while maintaining efficiency through shorter reasoning chains and improved intuition.", + "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", "features": { "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": "Enable deep thinking subroutine." + "start_token": "" } }, "group": "Llama3", "has_text_output": true, - "hf_slug": "deepcogito/cogito-v2-preview-llama-70B", + "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Deep Cogito: Cogito V2 Preview Llama 70B", + "instruct_type": "deepseek-r1", + "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", + "name": "DeepSeek: R1 Distill Llama 70B", "output_modalities": ["text"], - "permaslug": "deepcogito/cogito-v2-preview-llama-70b", + "permaslug": "deepseek/deepseek-r1-distill-llama-70b", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": "Enable deep thinking subroutine." + "start_token": "" }, "router": null, - "short_name": "Cogito V2 Preview Llama 70B", - "slug": "deepcogito/cogito-v2-preview-llama-70b", + "short_name": "R1 Distill Llama 70B", + "slug": "deepseek/deepseek-r1-distill-llama-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepcogito/cogito-v2-preview-llama-70b", - "model_variant_slug": "deepcogito/cogito-v2-preview-llama-70b", + "model_variant_permaslug": "deepseek/deepseek-r1-distill-llama-70b", + "model_variant_slug": "deepseek/deepseek-r1-distill-llama-70b", "moderation_required": false, - "name": "Together | deepcogito/cogito-v2-preview-llama-70b", + "name": "Together | deepseek/deepseek-r1-distill-llama-70b", "pricing": { - "completion": "0.00000088", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000088", - "request": "0", - "web_search": "0" + "prompt": "0.000002" }, "provider_display_name": "Together", "provider_info": { @@ -144265,7 +142247,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -144275,7 +142258,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "deepcogito/cogito-v2-preview-llama-70B", + "provider_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "provider_name": "Together", "provider_region": null, "provider_slug": "together", @@ -144283,8 +142266,6 @@ "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", @@ -144294,49 +142275,45 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": "Enable deep thinking subroutine." + "start_token": "" } }, "group": "Llama3", "has_text_output": true, - "hf_slug": "deepcogito/cogito-v2-preview-llama-70B", + "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Deep Cogito: Cogito V2 Preview Llama 70B", + "instruct_type": "deepseek-r1", + "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", + "name": "DeepSeek: R1 Distill Llama 70B", "output_modalities": ["text"], - "permaslug": "deepcogito/cogito-v2-preview-llama-70b", + "permaslug": "deepseek/deepseek-r1-distill-llama-70b", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": "Enable deep thinking subroutine." + "start_token": "" }, "router": null, - "short_name": "Cogito V2 Preview Llama 70B", - "slug": "deepcogito/cogito-v2-preview-llama-70b", + "short_name": "R1 Distill Llama 70B", + "slug": "deepseek/deepseek-r1-distill-llama-70b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepcogito", - "context_length": 128000, - "created_at": "2025-11-13T22:00:33.034408+00:00", + "author": "essentialai", + "context_length": 32768, + "created_at": "2025-12-07T08:07:27.970616+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -144344,11 +142321,11 @@ }, "default_stops": [], "default_system": null, - "description": "Cogito v2.1 671B MoE represents one of the strongest open models globally, matching performance of frontier closed and open models. This model is trained using self play with reinforcement learning to reach state-of-the-art performance on multiple categories (instruction following, coding, longer queries and creative writing). This advanced system demonstrates significant progress toward scalable superintelligence through policy improvement.", + "description": "Rnj-1 is an 8B-parameter, dense, open-weight model family developed by Essential AI and trained from scratch with a focus on programming, math, and scientific reasoning. The model demonstrates strong performance across multiple programming languages, tool-use workflows, and agentic execution environments (e.g., mini-SWE-agent). ", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -144357,7 +142334,6 @@ "training": false }, "features": { - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -144367,7 +142343,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "3ab8816e-40d5-4ad5-95f4-755cc0f0a8f8", + "id": "1d975e42-8769-43db-a998-a227a01985d5", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -144380,9 +142356,9 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepcogito", - "context_length": 128000, - "created_at": "2025-11-13T22:00:33.034408+00:00", + "author": "essentialai", + "context_length": 32768, + "created_at": "2025-12-07T08:07:27.970616+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, @@ -144390,50 +142366,45 @@ }, "default_stops": [], "default_system": null, - "description": "Cogito v2.1 671B MoE represents one of the strongest open models globally, matching performance of frontier closed and open models. This model is trained using self play with reinforcement learning to reach state-of-the-art performance on multiple categories (instruction following, coding, longer queries and creative writing). This advanced system demonstrates significant progress toward scalable superintelligence through policy improvement.", + "description": "Rnj-1 is an 8B-parameter, dense, open-weight model family developed by Essential AI and trained from scratch with a focus on programming, math, and scientific reasoning. The model demonstrates strong performance across multiple programming languages, tool-use workflows, and agentic execution environments (e.g., mini-SWE-agent). ", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "EssentialAI/rnj-1-instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Deep Cogito: Cogito v2.1 671B", + "name": "EssentialAI: Rnj 1 Instruct", "output_modalities": ["text"], - "permaslug": "deepcogito/cogito-v2.1-671b-20251118", + "permaslug": "essentialai/rnj-1-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Cogito v2.1 671B", - "slug": "deepcogito/cogito-v2.1-671b", - "updated_at": "2025-11-19T16:54:44.087083+00:00", + "short_name": "Rnj 1 Instruct", + "slug": "essentialai/rnj-1-instruct", + "updated_at": "2025-12-08T15:30:33.048521+00:00", "warning_message": null }, - "model_variant_permaslug": "deepcogito/cogito-v2.1-671b-20251118", - "model_variant_slug": "deepcogito/cogito-v2.1-671b", + "model_variant_permaslug": "essentialai/rnj-1-instruct", + "model_variant_slug": "essentialai/rnj-1-instruct", "moderation_required": false, - "name": "Together | deepcogito/cogito-v2.1-671b-20251118", + "name": "Together | essentialai/rnj-1-instruct", "pricing": { - "completion": "0.00000125", + "completion": "0.00000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Together", "provider_info": { @@ -144586,7 +142557,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -144596,14 +142568,12 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "deepcogito/cogito-v2-1-671b", + "provider_model_id": "essentialai/rnj-1-instruct", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -144618,7 +142588,7 @@ "response_format" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" @@ -144626,45 +142596,45 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "EssentialAI/rnj-1-instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Deep Cogito: Cogito v2.1 671B", + "name": "EssentialAI: Rnj 1 Instruct", "output_modalities": ["text"], - "permaslug": "deepcogito/cogito-v2.1-671b-20251118", + "permaslug": "essentialai/rnj-1-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Cogito v2.1 671B", - "slug": "deepcogito/cogito-v2.1-671b", - "updated_at": "2025-11-19T16:54:44.087083+00:00", + "short_name": "Rnj 1 Instruct", + "slug": "essentialai/rnj-1-instruct", + "updated_at": "2025-12-08T15:30:33.048521+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "author": "google", + "context_length": 32768, + "created_at": "2025-05-20T21:33:44.157973+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.\n\nThis model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions. [Read more in the blog post](https://developers.googleblog.com/en/introducing-gemma-3n/)", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -144673,7 +142643,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -144683,7 +142652,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "e650c2fa-e13e-4744-a8d5-b4745714c701", + "id": "2fbebc5f-9439-4d6d-af69-0c7aec1c4392", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -144692,49 +142661,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 12288, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "author": "google", + "context_length": 32000, + "created_at": "2025-05-20T21:33:44.157973+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", - "features": {}, - "group": "DeepSeek", + "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.\n\nThis model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions. [Read more in the blog post](https://developers.googleblog.com/en/introducing-gemma-3n/)", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "google/gemma-3n-E4B-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "model_version_group_id": null, + "name": "Google: Gemma 3n 4B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", - "reasoning_config": null, + "permaslug": "google/gemma-3n-e4b-it", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "Gemma 3n 4B", + "slug": "google/gemma-3n-e4b-it", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", - "model_variant_slug": "deepseek/deepseek-chat-v3-0324", + "model_variant_permaslug": "google/gemma-3n-e4b-it", + "model_variant_slug": "google/gemma-3n-e4b-it", "moderation_required": false, - "name": "Together | deepseek/deepseek-chat-v3-0324", + "name": "Together | google/gemma-3n-e4b-it", "pricing": { - "completion": "0.00000125", + "completion": "0.00000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000125", - "request": "0", - "web_search": "0" + "prompt": "0.00000002" }, "provider_display_name": "Together", "provider_info": { @@ -144887,7 +142861,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -144897,11 +142872,11 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "deepseek-ai/DeepSeek-V3", + "provider_model_id": "google/gemma-3n-E4B-it", "provider_name": "Together", "provider_region": null, - "provider_slug": "together/fp8", - "quantization": "fp8", + "provider_slug": "together", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -144912,51 +142887,55 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "DeepSeek", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "google/gemma-3n-E4B-it", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "model_version_group_id": null, + "name": "Google: Gemma 3n 4B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", - "reasoning_config": null, + "permaslug": "google/gemma-3n-e4b-it", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "Gemma 3n 4B", + "slug": "google/gemma-3n-e4b-it", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-05-28T17:59:30.833128+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "meta-llama", + "context_length": 8192, + "created_at": "2024-04-18T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 163840, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -144975,7 +142954,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "1203e612-3ca7-4f78-9ceb-dc6ac64ec069", + "id": "716c6edf-8c41-4cc3-91d4-66655971bd2e", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -144984,64 +142963,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 163840, - "created_at": "2025-05-28T17:59:30.833128+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "author": "meta-llama", + "context_length": 8192, + "created_at": "2024-04-18T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": null, - "name": "DeepSeek: R1 0528", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3 8B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "R1 0528", - "slug": "deepseek/deepseek-r1-0528", - "updated_at": "2026-01-08T20:10:31.314892+00:00", + "short_name": "Llama 3 8B Instruct", + "slug": "meta-llama/llama-3-8b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-0528", - "model_variant_slug": "deepseek/deepseek-r1-0528", + "model_variant_permaslug": "meta-llama/llama-3-8b-instruct", + "model_variant_slug": "meta-llama/llama-3-8b-instruct", "moderation_required": false, - "name": "Together | deepseek/deepseek-r1-0528", + "name": "Together | meta-llama/llama-3-8b-instruct", "pricing": { - "completion": "0.000007", + "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000003", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Together", "provider_info": { @@ -145194,7 +143153,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -145204,14 +143164,12 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "deepseek-ai/DeepSeek-R1", + "provider_model_id": "meta-llama/Meta-Llama-3-8B-Instruct-Lite", "provider_name": "Together", "provider_region": null, - "provider_slug": "together/fp8", - "quantization": "fp8", + "provider_slug": "together/int4", + "quantization": "int4", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -145224,49 +143182,38 @@ "min_p" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "DeepSeek", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-0528", + "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": null, - "name": "DeepSeek: R1 0528", + "instruct_type": "llama3", + "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", + "name": "Meta: Llama 3 8B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-0528", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3-8b-instruct", + "reasoning_config": null, "router": null, - "short_name": "R1 0528", - "slug": "deepseek/deepseek-r1-0528", - "updated_at": "2026-01-08T20:10:31.314892+00:00", + "short_name": "Llama 3 8B Instruct", + "slug": "meta-llama/llama-3-8b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", + "author": "meta-llama", "context_length": 131072, - "created_at": "2025-01-23T20:12:49.780212+00:00", + "created_at": "2024-07-23T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, @@ -145288,7 +143235,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c4f1a7bc-f6b6-4607-9b71-4804c2e5a5f8", + "id": "d71a12bc-4c11-42b2-adbb-a23de627c9ff", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -145297,57 +143244,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 32768, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 128000, - "created_at": "2025-01-23T20:12:49.780212+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-07-23T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across multiple benchmarks, including:\n\n- AIME 2024 pass@1: 70.0\n- MATH-500 pass@1: 94.5\n- CodeForces Rating: 1633\n\nThe model leverages fine-tuning from DeepSeek R1's outputs, enabling competitive performance comparable to larger frontier models.", - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, + "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Llama 70B", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.1 70B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-llama-70b", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "meta-llama/llama-3.1-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "R1 Distill Llama 70B", - "slug": "deepseek/deepseek-r1-distill-llama-70b", + "short_name": "Llama 3.1 70B Instruct", + "slug": "meta-llama/llama-3.1-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-r1-distill-llama-70b", - "model_variant_slug": "deepseek/deepseek-r1-distill-llama-70b", + "model_variant_permaslug": "meta-llama/llama-3.1-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.1-70b-instruct", "moderation_required": false, - "name": "Together | deepseek/deepseek-r1-distill-llama-70b", + "name": "Together | meta-llama/llama-3.1-70b-instruct", "pricing": { - "completion": "0.000002", + "completion": "0.00000088", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000088" }, "provider_display_name": "Together", "provider_info": { @@ -145500,7 +143434,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -145510,14 +143445,12 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "provider_model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", "provider_name": "Together", "provider_region": null, - "provider_slug": "together", - "quantization": "unknown", + "provider_slug": "together/fp8", + "quantization": "fp8", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -145527,57 +143460,47 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_reasoning": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": "", - "start_token": "" - } - }, + "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-r1", - "model_version_group_id": "92d90d33-1fa7-4537-b283-b8199ac69987", - "name": "DeepSeek: R1 Distill Llama 70B", + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.1 70B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-r1-distill-llama-70b", - "reasoning_config": { - "end_token": "", - "start_token": "" - }, + "permaslug": "meta-llama/llama-3.1-70b-instruct", + "reasoning_config": null, "router": null, - "short_name": "R1 Distill Llama 70B", - "slug": "deepseek/deepseek-r1-distill-llama-70b", + "short_name": "Llama 3.1 70B Instruct", + "slug": "meta-llama/llama-3.1-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "essentialai", - "context_length": 32768, - "created_at": "2025-12-07T08:07:27.970616+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-09-25T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Rnj-1 is an 8B-parameter, dense, open-weight model family developed by Essential AI and trained from scratch with a focus on programming, math, and scientific reasoning. The model demonstrates strong performance across multiple programming languages, tool-use workflows, and agentic execution environments (e.g., mini-SWE-agent). ", + "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -145595,7 +143518,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "1d975e42-8769-43db-a998-a227a01985d5", + "id": "94397221-5381-453f-8fc5-7efec5b16bdb", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -145608,60 +143531,40 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "essentialai", - "context_length": 32768, - "created_at": "2025-12-07T08:07:27.970616+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": [], + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-09-25T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Rnj-1 is an 8B-parameter, dense, open-weight model family developed by Essential AI and trained from scratch with a focus on programming, math, and scientific reasoning. The model demonstrates strong performance across multiple programming languages, tool-use workflows, and agentic execution environments (e.g., mini-SWE-agent). ", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "EssentialAI/rnj-1-instruct", + "hf_slug": "meta-llama/Llama-3.2-11B-Vision-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, + "input_modalities": ["text", "image"], + "instruct_type": "llama3", "model_version_group_id": null, - "name": "EssentialAI: Rnj 1 Instruct", + "name": "Meta: Llama 3.2 11B Vision Instruct", "output_modalities": ["text"], - "permaslug": "essentialai/rnj-1-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.2-11b-vision-instruct", + "reasoning_config": null, "router": null, - "short_name": "Rnj 1 Instruct", - "slug": "essentialai/rnj-1-instruct", - "updated_at": "2025-12-08T15:30:33.048521+00:00", + "short_name": "Llama 3.2 11B Vision Instruct", + "slug": "meta-llama/llama-3.2-11b-vision-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "essentialai/rnj-1-instruct", - "model_variant_slug": "essentialai/rnj-1-instruct", + "model_variant_permaslug": "meta-llama/llama-3.2-11b-vision-instruct", + "model_variant_slug": "meta-llama/llama-3.2-11b-vision-instruct", "moderation_required": false, - "name": "Together | essentialai/rnj-1-instruct", + "name": "Together | meta-llama/llama-3.2-11b-vision-instruct", "pricing": { - "completion": "0.00000015", + "completion": "0.00000018", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.00000018" }, "provider_display_name": "Together", "provider_info": { @@ -145814,20 +143717,21 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "Together", "owners": ["{}"], - "slug": "together", + "slug": "together/turbo", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "essentialai/rnj-1-instruct", + "provider_model_id": "meta-llama/Llama-Guard-3-11B-Vision-Turbo", "provider_name": "Together", "provider_region": null, - "provider_slug": "together", + "provider_slug": "together/turbo", "quantization": "unknown", "supported_parameters": [ "max_tokens", @@ -145839,9 +143743,7 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "structured_outputs", - "response_format" + "min_p" ], "supports_multipart": true, "supports_reasoning": false, @@ -145849,48 +143751,37 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "EssentialAI/rnj-1-instruct", + "hf_slug": "meta-llama/Llama-3.2-11B-Vision-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": null, + "input_modalities": ["text", "image"], + "instruct_type": "llama3", "model_version_group_id": null, - "name": "EssentialAI: Rnj 1 Instruct", + "name": "Meta: Llama 3.2 11B Vision Instruct", "output_modalities": ["text"], - "permaslug": "essentialai/rnj-1-instruct", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.2-11b-vision-instruct", + "reasoning_config": null, "router": null, - "short_name": "Rnj 1 Instruct", - "slug": "essentialai/rnj-1-instruct", - "updated_at": "2025-12-08T15:30:33.048521+00:00", + "short_name": "Llama 3.2 11B Vision Instruct", + "slug": "meta-llama/llama-3.2-11b-vision-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "google", - "context_length": 32768, - "created_at": "2025-05-20T21:33:44.157973+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-09-25T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.\n\nThis model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions. [Read more in the blog post](https://developers.googleblog.com/en/introducing-gemma-3n/)", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -145908,7 +143799,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "2fbebc5f-9439-4d6d-af69-0c7aec1c4392", + "id": "c6a08bcf-449c-4c66-a578-e67fb262ba26", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -145917,59 +143808,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 16384, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "google", - "context_length": 32000, - "created_at": "2025-05-20T21:33:44.157973+00:00", + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-09-25T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks such as text generation, speech recognition, translation, and image analysis. Leveraging innovations like Per-Layer Embedding (PLE) caching and the MatFormer architecture, Gemma 3n dynamically manages memory usage and computational load by selectively activating model parameters, significantly reducing runtime resource requirements.\n\nThis model supports a wide linguistic range (trained in over 140 languages) and features a flexible 32K token context window. Gemma 3n can selectively load parameters, optimizing memory and computational efficiency based on the task or device capabilities, making it well-suited for privacy-focused, offline-capable applications and on-device AI solutions. [Read more in the blog post](https://developers.googleblog.com/en/introducing-gemma-3n/)", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "google/gemma-3n-E4B-it", + "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Google: Gemma 3n 4B", + "name": "Meta: Llama 3.2 3B Instruct", "output_modalities": ["text"], - "permaslug": "google/gemma-3n-e4b-it", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.2-3b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Gemma 3n 4B", - "slug": "google/gemma-3n-e4b-it", + "short_name": "Llama 3.2 3B Instruct", + "slug": "meta-llama/llama-3.2-3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "google/gemma-3n-e4b-it", - "model_variant_slug": "google/gemma-3n-e4b-it", + "model_variant_permaslug": "meta-llama/llama-3.2-3b-instruct", + "model_variant_slug": "meta-llama/llama-3.2-3b-instruct", "moderation_required": false, - "name": "Together | google/gemma-3n-e4b-it", + "name": "Together | meta-llama/llama-3.2-3b-instruct", "pricing": { - "completion": "0.00000004", + "completion": "0.00000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000006" }, "provider_display_name": "Together", "provider_info": { @@ -146122,7 +143998,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -146132,11 +144009,11 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "google/gemma-3n-E4B-it", + "provider_model_id": "meta-llama/Llama-3.2-3B-Instruct-Turbo", "provider_name": "Together", "provider_region": null, - "provider_slug": "together", - "quantization": "unknown", + "provider_slug": "together/fp8", + "quantization": "fp8", "supported_parameters": [ "max_tokens", "temperature", @@ -146155,47 +144032,37 @@ "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Llama3", "has_text_output": true, - "hf_slug": "google/gemma-3n-E4B-it", + "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "llama3", "model_version_group_id": null, - "name": "Google: Gemma 3n 4B", + "name": "Meta: Llama 3.2 3B Instruct", "output_modalities": ["text"], - "permaslug": "google/gemma-3n-e4b-it", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "meta-llama/llama-3.2-3b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Gemma 3n 4B", - "slug": "google/gemma-3n-e4b-it", + "short_name": "Llama 3.2 3B Instruct", + "slug": "meta-llama/llama-3.2-3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -146204,7 +144071,10 @@ "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -146214,7 +144084,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "716c6edf-8c41-4cc3-91d4-66655971bd2e", + "id": "0f0f7609-28d7-40d1-848e-2b2e69d11912", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -146223,49 +144093,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-04-18T00:00:00+00:00", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", "default_parameters": {}, "default_stops": ["<|eot_id|>", "<|end_of_text|>"], "default_system": null, - "description": "Meta's latest class of model (Llama 3) launched with a variety of sizes & flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3 8B Instruct", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-8b-instruct", + "permaslug": "meta-llama/llama-3.3-70b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3 8B Instruct", - "slug": "meta-llama/llama-3-8b-instruct", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3-8b-instruct", - "model_variant_slug": "meta-llama/llama-3-8b-instruct", + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", "moderation_required": false, - "name": "Together | meta-llama/llama-3-8b-instruct", + "name": "Together | meta-llama/llama-3.3-70b-instruct", "pricing": { - "completion": "0.0000001", + "completion": "0.00000088", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000088" }, "provider_display_name": "Together", "provider_info": { @@ -146418,7 +144283,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -146428,12 +144294,14 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "meta-llama/Meta-Llama-3-8B-Instruct-Lite", + "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct-Turbo", "provider_name": "Together", "provider_region": null, - "provider_slug": "together/int4", - "quantization": "int4", + "provider_slug": "together/fp8", + "quantization": "fp8", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -146443,45 +144311,47 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3-8B-Instruct", + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "llama3", - "model_version_group_id": "803c32ed-9861-4abf-b5da-7d9c9e6dcf04", - "name": "Meta: Llama 3 8B Instruct", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3-8b-instruct", + "permaslug": "meta-llama/llama-3.3-70b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3 8B Instruct", - "slug": "meta-llama/llama-3-8b-instruct", + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "meta-llama", - "context_length": 10000, - "created_at": "2024-07-23T00:00:00+00:00", + "context_length": 1048576, + "created_at": "2025-04-05T19:37:02.129674+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 10000, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -146490,6 +144360,10 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -146499,7 +144373,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9398d834-55aa-48e8-9f44-10785ef90a7f", + "id": "f55cf482-8f7d-4e7a-832e-aedd59df2637", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -146510,47 +144384,42 @@ "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, - "max_tokens_per_image": null, + "max_tokens_per_image": 3224, "model": { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "context_length": 1048576, + "created_at": "2025-04-05T19:37:02.129674+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "The highly anticipated 400B class of Llama3 is here! Clocking in at 128k context with impressive eval scores, the Meta AI team continues to push the frontier of open-source LLMs.\n\nMeta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 405B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models including GPT-4o and Claude 3.5 Sonnet in evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", "features": {}, - "group": "Llama3", + "group": "Llama4", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-405B-Instruct", + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", - "name": "Meta: Llama 3.1 405B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-405b-instruct", + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 405B Instruct", - "slug": "meta-llama/llama-3.1-405b-instruct", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-405b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-405b-instruct", + "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "model_variant_slug": "meta-llama/llama-4-maverick", "moderation_required": false, - "name": "Together | meta-llama/llama-3.1-405b-instruct", + "name": "Together | meta-llama/llama-4-maverick-17b-128e-instruct", "pricing": { - "completion": "0.0000035", + "completion": "0.00000085", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000035", - "request": "0", - "web_search": "0" + "prompt": "0.00000027" }, "provider_display_name": "Together", "provider_info": { @@ -146703,7 +144572,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -146713,12 +144583,14 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo", + "provider_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "provider_name": "Together", "provider_region": null, "provider_slug": "together/fp8", "quantization": "fp8", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -146739,36 +144611,36 @@ "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Llama4", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-405B-Instruct", + "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "1fd9d06b-aa20-4c7d-a0b1-d3d9b5aae712", - "name": "Meta: Llama 3.1 405B Instruct", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama 4 Maverick", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-405b-instruct", + "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 405B Instruct", - "slug": "meta-llama/llama-3.1-405b-instruct", + "short_name": "Llama 4 Maverick", + "slug": "meta-llama/llama-4-maverick", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "context_length": 1048576, + "created_at": "2025-04-30T01:06:33.531556+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 1048576, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -146777,6 +144649,7 @@ "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -146786,10 +144659,10 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "d71a12bc-4c11-42b2-adbb-a23de627c9ff", + "id": "4864c772-7e6a-4fe8-bfc1-40bbaa3a27fd", "is_byok": false, "is_deranked": false, - "is_disabled": false, + "is_disabled": true, "is_free": false, "is_hidden": false, "limit_rpd": null, @@ -146800,44 +144673,39 @@ "max_tokens_per_image": null, "model": { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-07-23T00:00:00+00:00", + "context_length": 163840, + "created_at": "2025-04-30T01:06:33.531556+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes & flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3-1/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "hf_slug": "meta-llama/Llama-Guard-4-12B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.1 70B Instruct", + "input_modalities": ["image", "text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama Guard 4 12B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-70b-instruct", + "permaslug": "meta-llama/llama-guard-4-12b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 70B Instruct", - "slug": "meta-llama/llama-3.1-70b-instruct", + "short_name": "Llama Guard 4 12B", + "slug": "meta-llama/llama-guard-4-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.1-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.1-70b-instruct", + "model_variant_permaslug": "meta-llama/llama-guard-4-12b", + "model_variant_slug": "meta-llama/llama-guard-4-12b", "moderation_required": false, - "name": "Together | meta-llama/llama-3.1-70b-instruct", + "name": "Together | meta-llama/llama-guard-4-12b", "pricing": { - "completion": "0.00000088", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000088", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Together", "provider_info": { @@ -146990,7 +144858,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -147000,11 +144869,11 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", + "provider_model_id": "meta-llama/Llama-Guard-4-12B", "provider_name": "Together", "provider_region": null, - "provider_slug": "together/fp8", - "quantization": "fp8", + "provider_slug": "together", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -147015,47 +144884,45 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "hf_slug": "meta-llama/Llama-Guard-4-12B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.1 70B Instruct", + "input_modalities": ["image", "text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Meta: Llama Guard 4 12B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.1-70b-instruct", + "permaslug": "meta-llama/llama-guard-4-12b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.1 70B Instruct", - "slug": "meta-llama/llama-3.1-70b-instruct", + "short_name": "Llama Guard 4 12B", + "slug": "meta-llama/llama-guard-4-12b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", + "context_length": 8192, + "created_at": "2024-05-13T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 8192, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -147073,7 +144940,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "94397221-5381-453f-8fc5-7efec5b16bdb", + "id": "4f39d8b1-b1e1-4158-ab45-8a3de7674c7b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -147087,44 +144954,39 @@ "max_tokens_per_image": null, "model": { "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", + "context_length": 8192, + "created_at": "2024-05-13T00:00:00+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and visual question answering, bridging the gap between language generation and visual reasoning. Pre-trained on a massive dataset of image-text pairs, it performs well in complex, high-accuracy image analysis.\n\nIts ability to integrate visual understanding with language processing makes it an ideal solution for industries requiring comprehensive visual-linguistic AI applications, such as content creation, AI-driven customer service, and research.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD_VISION.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "hf_slug": "meta-llama/Meta-Llama-Guard-2-8B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "llama3", + "input_modalities": ["text"], + "instruct_type": "none", "model_version_group_id": null, - "name": "Meta: Llama 3.2 11B Vision Instruct", + "name": "Meta: LlamaGuard 2 8B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-11b-vision-instruct", + "permaslug": "meta-llama/llama-guard-2-8b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.2 11B Vision Instruct", - "slug": "meta-llama/llama-3.2-11b-vision-instruct", + "short_name": "LlamaGuard 2 8B", + "slug": "meta-llama/llama-guard-2-8b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.2-11b-vision-instruct", - "model_variant_slug": "meta-llama/llama-3.2-11b-vision-instruct", + "model_variant_permaslug": "meta-llama/llama-guard-2-8b", + "model_variant_slug": "meta-llama/llama-guard-2-8b", "moderation_required": false, - "name": "Together | meta-llama/llama-3.2-11b-vision-instruct", + "name": "Together | meta-llama/llama-guard-2-8b", "pricing": { - "completion": "0.00000018", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000018", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Together", "provider_info": { @@ -147277,20 +145139,21 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, "name": "Together", "owners": ["{}"], - "slug": "together/turbo", + "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "meta-llama/Llama-Guard-3-11B-Vision-Turbo", + "provider_model_id": "meta-llama/LlamaGuard-2-8b", "provider_name": "Together", "provider_region": null, - "provider_slug": "together/turbo", + "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ "max_tokens", @@ -147313,34 +145176,38 @@ "features": {}, "group": "Llama3", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "hf_slug": "meta-llama/Meta-Llama-Guard-2-8B", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": "llama3", + "input_modalities": ["text"], + "instruct_type": "none", "model_version_group_id": null, - "name": "Meta: Llama 3.2 11B Vision Instruct", + "name": "Meta: LlamaGuard 2 8B", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-11b-vision-instruct", + "permaslug": "meta-llama/llama-guard-2-8b", "reasoning_config": null, "router": null, - "short_name": "Llama 3.2 11B Vision Instruct", - "slug": "meta-llama/llama-3.2-11b-vision-instruct", + "short_name": "LlamaGuard 2 8B", + "slug": "meta-llama/llama-guard-2-8b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "mistralai", + "context_length": 262144, + "created_at": "2025-12-02T13:22:15.851192+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.3, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -147358,7 +145225,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c6a08bcf-449c-4c66-a578-e67fb262ba26", + "id": "ca930689-2105-425e-9aef-d42ea9841c44", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -147367,49 +145234,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-09-25T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "mistralai", + "context_length": 128000, + "created_at": "2025-12-02T13:22:15.851192+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.3, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", - "features": {}, - "group": "Llama3", + "description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", + "hf_slug": "mistralai/Ministral-3-14B-Instruct-2512", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 3.2 3B Instruct", + "name": "Mistral: Ministral 3 14B 2512", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-3b-instruct", - "reasoning_config": null, + "permaslug": "mistralai/ministral-14b-2512", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.2 3B Instruct", - "slug": "meta-llama/llama-3.2-3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Ministral 3 14B 2512", + "slug": "mistralai/ministral-14b-2512", + "updated_at": "2025-12-10T16:54:21.432818+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.2-3b-instruct", - "model_variant_slug": "meta-llama/llama-3.2-3b-instruct", + "model_variant_permaslug": "mistralai/ministral-14b-2512", + "model_variant_slug": "mistralai/ministral-14b-2512", "moderation_required": false, - "name": "Together | meta-llama/llama-3.2-3b-instruct", + "name": "Together | mistralai/ministral-14b-2512", "pricing": { - "completion": "0.00000006", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Together", "provider_info": { @@ -147562,7 +145439,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -147572,11 +145450,11 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "meta-llama/Llama-3.2-3B-Instruct-Turbo", + "provider_model_id": "mistralai/Ministral-3-14B-Instruct-2512", "provider_name": "Together", "provider_region": null, - "provider_slug": "together/fp8", - "quantization": "fp8", + "provider_slug": "together", + "quantization": "unknown", "supported_parameters": [ "max_tokens", "temperature", @@ -147587,45 +145465,62 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tool_choice", + "tools", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Llama3", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", + "hf_slug": "mistralai/Ministral-3-14B-Instruct-2512", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "llama3", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama 3.2 3B Instruct", + "name": "Mistral: Ministral 3 14B 2512", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.2-3b-instruct", - "reasoning_config": null, + "permaslug": "mistralai/ministral-14b-2512", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.2 3B Instruct", - "slug": "meta-llama/llama-3.2-3b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Ministral 3 14B 2512", + "slug": "mistralai/ministral-14b-2512", + "updated_at": "2025-12-10T16:54:21.432818+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "mistralai", + "context_length": 32768, + "created_at": "2024-05-27T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -147634,10 +145529,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -147647,7 +145538,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "0f0f7609-28d7-40d1-848e-2b2e69d11912", + "id": "dfb95771-12b5-41bb-9607-1e42c9cdaec6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -147656,49 +145547,46 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", - "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "author": "mistralai", + "context_length": 32768, + "created_at": "2024-05-27T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", "features": {}, - "group": "Llama3", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "mistralai/Mistral-7B-Instruct-v0.3", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": "mistral", + "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", + "name": "Mistral: Mistral 7B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", + "permaslug": "mistralai/mistral-7b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", + "short_name": "Mistral 7B Instruct", + "slug": "mistralai/mistral-7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_permaslug": "mistralai/mistral-7b-instruct", + "model_variant_slug": "mistralai/mistral-7b-instruct", "moderation_required": false, - "name": "Together | meta-llama/llama-3.3-70b-instruct", + "name": "Together | mistralai/mistral-7b-instruct", "pricing": { - "completion": "0.00000088", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000088", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Together", "provider_info": { @@ -147851,7 +145739,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -147861,14 +145750,12 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct-Turbo", + "provider_model_id": "mistralai/Mistral-7B-Instruct-v0.3", "provider_name": "Together", "provider_region": null, - "provider_slug": "together/fp8", - "quantization": "fp8", + "provider_slug": "together", + "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", @@ -147878,47 +145765,47 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "mistralai/Mistral-7B-Instruct-v0.3", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": "mistral", + "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", + "name": "Mistral: Mistral 7B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", + "permaslug": "mistralai/mistral-7b-instruct", "reasoning_config": null, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", + "short_name": "Mistral 7B Instruct", + "slug": "mistralai/mistral-7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "mistralai", + "context_length": 32768, + "created_at": "2023-12-28T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 1048576, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -147927,10 +145814,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -147940,7 +145823,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f55cf482-8f7d-4e7a-832e-aedd59df2637", + "id": "f2583ec3-cf43-4a91-8328-b22496daa629", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -147951,47 +145834,44 @@ "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, - "max_tokens_per_image": 3224, + "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:37:02.129674+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "mistralai", + "context_length": 32768, + "created_at": "2023-12-28T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total). It supports multilingual text and image input, and produces multilingual text and code output across 12 supported languages. Optimized for vision-language tasks, Maverick is instruction-tuned for assistant-like behavior, image reasoning, and general-purpose multimodal interaction.\n\nMaverick features early fusion for native multimodality and a 1 million token context window. It was trained on a curated mixture of public, licensed, and Meta-platform data, covering ~22 trillion tokens, with a knowledge cutoff in August 2024. Released on April 5, 2025 under the Llama 4 Community License, Maverick is suited for research and commercial applications requiring advanced multimodal understanding and high model throughput.", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention", "features": {}, - "group": "Llama4", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": "mistralai/Mistral-7B-Instruct-v0.2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "input_modalities": ["text"], + "instruct_type": "mistral", + "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", + "name": "Mistral: Mistral 7B Instruct v0.2", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "permaslug": "mistralai/mistral-7b-instruct-v0.2", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Mistral 7B Instruct v0.2", + "slug": "mistralai/mistral-7b-instruct-v0.2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", - "model_variant_slug": "meta-llama/llama-4-maverick", + "model_variant_permaslug": "mistralai/mistral-7b-instruct-v0.2", + "model_variant_slug": "mistralai/mistral-7b-instruct-v0.2", "moderation_required": false, - "name": "Together | meta-llama/llama-4-maverick-17b-128e-instruct", + "name": "Together | mistralai/mistral-7b-instruct-v0.2", "pricing": { - "completion": "0.00000085", + "completion": "0.0000002", "discount": 0, - "image": "0.00090234", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000027", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Together", "provider_info": { @@ -148144,7 +146024,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -148154,14 +146035,12 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", + "provider_model_id": "mistralai/Mistral-7B-Instruct-v0.2", "provider_name": "Together", "provider_region": null, - "provider_slug": "together/fp8", - "quantization": "fp8", + "provider_slug": "together", + "quantization": "unknown", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", @@ -148171,47 +146050,47 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama4", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", + "hf_slug": "mistralai/Mistral-7B-Instruct-v0.2", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Meta: Llama 4 Maverick", + "input_modalities": ["text"], + "instruct_type": "mistral", + "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", + "name": "Mistral: Mistral 7B Instruct v0.2", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-maverick-17b-128e-instruct", + "permaslug": "mistralai/mistral-7b-instruct-v0.2", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Maverick", - "slug": "meta-llama/llama-4-maverick", + "short_name": "Mistral 7B Instruct v0.2", + "slug": "mistralai/mistral-7b-instruct-v0.2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-05T19:31:59.735804+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "mistralai", + "context_length": 32768, + "created_at": "2024-05-27T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 1048576, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -148220,7 +146099,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -148230,7 +146108,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "24fa6d0d-b679-4e6d-8c94-b7581400ec92", + "id": "9b3b4c40-ab22-4a54-ab9f-6dd78b03a2b6", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -148239,49 +146117,46 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 4096, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 10000000, - "created_at": "2025-04-05T19:31:59.735804+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "mistralai", + "context_length": 32768, + "created_at": "2024-05-27T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input (text and image) and multilingual output (text and code) across 12 supported languages. Designed for assistant-style interaction and visual reasoning, Scout uses 16 experts per forward pass and features a context length of 10 million tokens, with a training corpus of ~40 trillion tokens.\n\nBuilt for high efficiency and local or commercial deployment, Llama 4 Scout incorporates early fusion for seamless modality integration. It is instruction-tuned for use in multilingual chat, captioning, and image understanding tasks. Released under the Llama 4 Community License, it was last trained on data up to August 2024 and launched publicly on April 5, 2025.", + "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", "features": {}, - "group": "Llama4", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": "mistralai/Mistral-7B-Instruct-v0.3", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "input_modalities": ["text"], + "instruct_type": "mistral", + "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", + "name": "Mistral: Mistral 7B Instruct v0.3", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "permaslug": "mistralai/mistral-7b-instruct-v0.3", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Mistral 7B Instruct v0.3", + "slug": "mistralai/mistral-7b-instruct-v0.3", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", - "model_variant_slug": "meta-llama/llama-4-scout", + "model_variant_permaslug": "mistralai/mistral-7b-instruct-v0.3", + "model_variant_slug": "mistralai/mistral-7b-instruct-v0.3", "moderation_required": false, - "name": "Together | meta-llama/llama-4-scout-17b-16e-instruct", + "name": "Together | mistralai/mistral-7b-instruct-v0.3", "pricing": { - "completion": "0.00000059", + "completion": "0.0000002", "discount": 0, - "image": "0.00090234", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000018", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Together", "provider_info": { @@ -148434,7 +146309,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -148444,7 +146320,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "provider_model_id": "mistralai/Mistral-7B-Instruct-v0.3", "provider_name": "Together", "provider_region": null, "provider_slug": "together", @@ -148459,47 +146335,49 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama4", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "hf_slug": "mistralai/Mistral-7B-Instruct-v0.3", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], - "instruct_type": null, - "model_version_group_id": null, - "name": "Meta: Llama 4 Scout", + "input_modalities": ["text"], + "instruct_type": "mistral", + "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", + "name": "Mistral: Mistral 7B Instruct v0.3", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-4-scout-17b-16e-instruct", + "permaslug": "mistralai/mistral-7b-instruct-v0.3", "reasoning_config": null, "router": null, - "short_name": "Llama 4 Scout", - "slug": "meta-llama/llama-4-scout", + "short_name": "Mistral 7B Instruct v0.3", + "slug": "mistralai/mistral-7b-instruct-v0.3", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 1048576, - "created_at": "2025-04-30T01:06:33.531556+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 32768, + "created_at": "2025-01-30T16:43:29.33592+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.3, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", + "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 1048576, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -148508,7 +146386,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -148518,58 +146395,68 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "4864c772-7e6a-4fe8-bfc1-40bbaa3a27fd", + "id": "b5615182-891f-418e-841a-246efbe6749c", "is_byok": false, "is_deranked": false, - "is_disabled": true, + "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 163840, - "created_at": "2025-04-30T01:06:33.531556+00:00", - "default_parameters": {}, + "author": "mistralai", + "context_length": 32768, + "created_at": "2025-01-30T16:43:29.33592+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.3, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification) and in LLM responses (response classification). It acts as an LLM—generating text in its output that indicates whether a given prompt or response is safe or unsafe, and if unsafe, it also lists the content categories violated.\n\nLlama Guard 4 was aligned to safeguard against the standardized MLCommons hazards taxonomy and designed to support multimodal Llama 4 capabilities. Specifically, it combines features from previous Llama Guard models, providing content moderation for English and multiple supported languages, along with enhanced capabilities to handle mixed text-and-image prompts, including multiple images. Additionally, Llama Guard 4 is integrated into the Llama Moderations API, extending robust safety classification to text and images.", - "features": {}, - "group": "Other", + "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-Guard-4-12B", + "hf_slug": "mistralai/Mistral-Small-24B-Instruct-2501", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama Guard 4 12B", + "name": "Mistral: Mistral Small 3", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-4-12b", - "reasoning_config": null, + "permaslug": "mistralai/mistral-small-24b-instruct-2501", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama Guard 4 12B", - "slug": "meta-llama/llama-guard-4-12b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Mistral Small 3", + "slug": "mistralai/mistral-small-24b-instruct-2501", + "updated_at": "2025-12-16T18:22:59.07006+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-guard-4-12b", - "model_variant_slug": "meta-llama/llama-guard-4-12b", + "model_variant_permaslug": "mistralai/mistral-small-24b-instruct-2501", + "model_variant_slug": "mistralai/mistral-small-24b-instruct-2501", "moderation_required": false, - "name": "Together | meta-llama/llama-guard-4-12b", + "name": "Together | mistralai/mistral-small-24b-instruct-2501", "pricing": { - "completion": "0.0000002", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Together", "provider_info": { @@ -148722,7 +146609,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -148732,7 +146620,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "meta-llama/Llama-Guard-4-12B", + "provider_model_id": "mistralai/Mistral-Small-24B-Instruct-2501", "provider_name": "Together", "provider_region": null, "provider_slug": "together", @@ -148747,45 +146635,60 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Other", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Llama-Guard-4-12B", + "hf_slug": "mistralai/Mistral-Small-24B-Instruct-2501", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Meta: Llama Guard 4 12B", + "name": "Mistral: Mistral Small 3", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-4-12b", - "reasoning_config": null, + "permaslug": "mistralai/mistral-small-24b-instruct-2501", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama Guard 4 12B", - "slug": "meta-llama/llama-guard-4-12b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Mistral Small 3", + "slug": "mistralai/mistral-small-24b-instruct-2501", + "updated_at": "2025-12-16T18:22:59.07006+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-05-13T00:00:00+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "mistralai", + "context_length": 32768, + "created_at": "2023-12-10T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 8192, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -148803,7 +146706,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "4f39d8b1-b1e1-4158-ab45-8a3de7674c7b", + "id": "2334f702-4b18-4df0-b22e-c99bc7340b90", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -148812,49 +146715,46 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 8192, - "created_at": "2024-05-13T00:00:00+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "mistralai", + "context_length": 32768, + "created_at": "2023-12-10T00:00:00+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": ["[INST]", ""], "default_system": null, - "description": "This safeguard model has 8B parameters and is based on the Llama 3 family. Just like is predecessor, [LlamaGuard 1](https://huggingface.co/meta-llama/LlamaGuard-7b), it can do both prompt and response classification.\n\nLlamaGuard 2 acts as a normal LLM would, generating text that indicates whether the given input/output is safe/unsafe. If deemed unsafe, it will also share the content categories violated.\n\nFor best results, please use raw prompt input or the `/completions` endpoint, instead of the chat API.\n\nIt has demonstrated strong performance compared to leading closed-source models in human evaluations.\n\nTo read more about the model release, [click here](https://ai.meta.com/blog/meta-llama-3/). Usage of this model is subject to [Meta's Acceptable Use Policy](https://llama.meta.com/llama3/use-policy/).", + "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", "features": {}, - "group": "Llama3", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-Guard-2-8B", + "hf_slug": "mistralai/Mixtral-8x7B-Instruct-v0.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "none", + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Meta: LlamaGuard 2 8B", + "name": "Mistral: Mixtral 8x7B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-2-8b", + "permaslug": "mistralai/mixtral-8x7b-instruct", "reasoning_config": null, "router": null, - "short_name": "LlamaGuard 2 8B", - "slug": "meta-llama/llama-guard-2-8b", + "short_name": "Mixtral 8x7B Instruct", + "slug": "mistralai/mixtral-8x7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-guard-2-8b", - "model_variant_slug": "meta-llama/llama-guard-2-8b", + "model_variant_permaslug": "mistralai/mixtral-8x7b-instruct", + "model_variant_slug": "mistralai/mixtral-8x7b-instruct", "moderation_required": false, - "name": "Together | meta-llama/llama-guard-2-8b", + "name": "Together | mistralai/mixtral-8x7b-instruct", "pricing": { - "completion": "0.0000002", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "Together", "provider_info": { @@ -149007,7 +146907,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -149017,7 +146918,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "meta-llama/LlamaGuard-2-8b", + "provider_model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "provider_name": "Together", "provider_region": null, "provider_slug": "together", @@ -149032,45 +146933,43 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": {}, - "group": "Llama3", + "group": "Mistral", "has_text_output": true, - "hf_slug": "meta-llama/Meta-Llama-Guard-2-8B", + "hf_slug": "mistralai/Mixtral-8x7B-Instruct-v0.1", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "none", + "instruct_type": "mistral", "model_version_group_id": null, - "name": "Meta: LlamaGuard 2 8B", + "name": "Mistral: Mixtral 8x7B Instruct", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-guard-2-8b", + "permaslug": "mistralai/mixtral-8x7b-instruct", "reasoning_config": null, "router": null, - "short_name": "LlamaGuard 2 8B", - "slug": "meta-llama/llama-guard-2-8b", + "short_name": "Mixtral 8x7B Instruct", + "slug": "mistralai/mixtral-8x7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", + "author": "moonshotai", "context_length": 262144, - "created_at": "2025-12-02T13:22:15.851192+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.3, - "top_p": null - }, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, @@ -149083,6 +146982,11 @@ "training": false }, "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -149092,7 +146996,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "ca930689-2105-425e-9aef-d42ea9841c44", + "id": "92d45684-e3bf-4f16-a9c4-57b4762df4c3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -149105,60 +147009,50 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 128000, - "created_at": "2025-12-02T13:22:15.851192+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.3, - "top_p": null - }, + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-09-04T21:25:47.673205+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language model with vision capabilities.", + "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Mistral", + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Ministral-3-14B-Instruct-2512", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Ministral 3 14B 2512", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-14b-2512", + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Ministral 3 14B 2512", - "slug": "mistralai/ministral-14b-2512", - "updated_at": "2025-12-10T16:54:21.432818+00:00", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/ministral-14b-2512", - "model_variant_slug": "mistralai/ministral-14b-2512", + "model_variant_permaslug": "moonshotai/kimi-k2-0905", + "model_variant_slug": "moonshotai/kimi-k2-0905", "moderation_required": false, - "name": "Together | mistralai/ministral-14b-2512", + "name": "Together | moonshotai/kimi-k2-0905", "pricing": { - "completion": "0.0000002", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.000001" }, "provider_display_name": "Together", "provider_info": { @@ -149311,7 +147205,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -149321,12 +147216,14 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "mistralai/Ministral-3-14B-Instruct-2512", + "provider_model_id": "moonshotai/Kimi-K2-Instruct-0905", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -149337,10 +147234,8 @@ "repetition_penalty", "logit_bias", "min_p", - "tool_choice", "tools", - "structured_outputs", - "response_format" + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, @@ -149349,49 +147244,50 @@ "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null } }, - "group": "Mistral", + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Ministral-3-14B-Instruct-2512", + "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Ministral 3 14B 2512", + "name": "MoonshotAI: Kimi K2 0905", "output_modalities": ["text"], - "permaslug": "mistralai/ministral-14b-2512", + "permaslug": "moonshotai/kimi-k2-0905", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Ministral 3 14B 2512", - "slug": "mistralai/ministral-14b-2512", - "updated_at": "2025-12-10T16:54:21.432818+00:00", + "short_name": "Kimi K2 0905", + "slug": "moonshotai/kimi-k2-0905", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32768, - "created_at": "2024-05-27T00:00:00+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -149400,6 +147296,8 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -149409,7 +147307,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "dfb95771-12b5-41bb-9607-1e42c9cdaec6", + "id": "5ac3fc1e-e83e-412f-9221-34b5e8919b3a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -149418,51 +147316,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32768, - "created_at": "2024-05-27T00:00:00+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2025-11-06T14:50:22.752525+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\n*Mistral 7B Instruct has multiple version variants, and this is intended to be the latest version.*", - "features": {}, - "group": "Mistral", + "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-7B-Instruct-v0.3", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", - "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", - "name": "Mistral: Mistral 7B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-7b-instruct", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral 7B Instruct", - "slug": "mistralai/mistral-7b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-7b-instruct", - "model_variant_slug": "mistralai/mistral-7b-instruct", + "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", + "model_variant_slug": "moonshotai/kimi-k2-thinking", "moderation_required": false, - "name": "Together | mistralai/mistral-7b-instruct", + "name": "Together | moonshotai/kimi-k2-thinking-20251106", "pricing": { - "completion": "0.0000002", + "completion": "0.000004", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000012" }, "provider_display_name": "Together", "provider_info": { @@ -149615,7 +147521,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -149625,12 +147532,14 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "mistralai/Mistral-7B-Instruct-v0.3", + "provider_model_id": "moonshotai/Kimi-K2-Thinking", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -149640,47 +147549,63 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tool_choice", + "tools", + "structured_outputs" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-7B-Instruct-v0.3", + "hf_slug": "moonshotai/Kimi-K2-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", - "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", - "name": "Mistral: Mistral 7B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2 Thinking", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-7b-instruct", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2-thinking-20251106", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral 7B Instruct", - "slug": "mistralai/mistral-7b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2 Thinking", + "slug": "moonshotai/kimi-k2-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32768, - "created_at": "2023-12-28T00:00:00+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -149689,6 +147614,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -149698,7 +147624,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "f2583ec3-cf43-4a91-8328-b22496daa629", + "id": "3438c28e-ebcb-4f46-8bab-f133912d5297", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -149711,47 +147637,53 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32768, - "created_at": "2023-12-28T00:00:00+00:00", + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention", - "features": {}, - "group": "Mistral", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-7B-Instruct-v0.2", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "mistral", - "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", - "name": "Mistral: Mistral 7B Instruct v0.2", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-7b-instruct-v0.2", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2.5-0127", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "Mistral 7B Instruct v0.2", - "slug": "mistralai/mistral-7b-instruct-v0.2", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-7b-instruct-v0.2", - "model_variant_slug": "mistralai/mistral-7b-instruct-v0.2", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "Together | mistralai/mistral-7b-instruct-v0.2", + "name": "Together | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.0000002", + "completion": "0.0000028", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, "provider_display_name": "Together", "provider_info": { @@ -149904,7 +147836,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -149914,12 +147847,14 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "mistralai/Mistral-7B-Instruct-v0.2", + "provider_model_id": "moonshotai/Kimi-K2.5", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -149932,44 +147867,51 @@ "min_p" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-7B-Instruct-v0.2", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "mistral", - "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", - "name": "Mistral: Mistral 7B Instruct v0.2", + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-7b-instruct-v0.2", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2.5-0127", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "Mistral 7B Instruct v0.2", - "slug": "mistralai/mistral-7b-instruct-v0.2", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32768, - "created_at": "2024-05-27T00:00:00+00:00", - "default_parameters": { - "temperature": 0.3 - }, - "default_stops": ["[INST]", ""], + "author": "nvidia", + "context_length": 131072, + "created_at": "2025-09-05T21:13:27.486887+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", + "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -149987,7 +147929,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9b3b4c40-ab22-4a54-ab9f-6dd78b03a2b6", + "id": "362c51e8-b832-4ef5-ac0b-3a3ec4286d5f", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -149996,51 +147938,54 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 4096, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32768, - "created_at": "2024-05-27T00:00:00+00:00", - "default_parameters": { - "temperature": 0.3 - }, - "default_stops": ["[INST]", ""], + "author": "nvidia", + "context_length": 32000, + "created_at": "2025-09-05T21:13:27.486887+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct v0.2](/models/mistralai/mistral-7b-instruct-v0.2), with the following changes:\n\n- Extended vocabulary to 32768\n- Supports v3 Tokenizer\n- Supports function calling\n\nNOTE: Support for function calling depends on the provider.", - "features": {}, - "group": "Mistral", + "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-7B-Instruct-v0.3", + "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", - "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", - "name": "Mistral: Mistral 7B Instruct v0.3", + "instruct_type": null, + "model_version_group_id": null, + "name": "NVIDIA: Nemotron Nano 9B V2", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-7b-instruct-v0.3", - "reasoning_config": null, + "permaslug": "nvidia/nemotron-nano-9b-v2", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral 7B Instruct v0.3", - "slug": "mistralai/mistral-7b-instruct-v0.3", + "short_name": "Nemotron Nano 9B V2", + "slug": "nvidia/nemotron-nano-9b-v2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-7b-instruct-v0.3", - "model_variant_slug": "mistralai/mistral-7b-instruct-v0.3", + "model_variant_permaslug": "nvidia/nemotron-nano-9b-v2", + "model_variant_slug": "nvidia/nemotron-nano-9b-v2", "moderation_required": false, - "name": "Together | mistralai/mistral-7b-instruct-v0.3", + "name": "Together | nvidia/nemotron-nano-9b-v2", "pricing": { - "completion": "0.0000002", + "completion": "0.00000025", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000006" }, "provider_display_name": "Together", "provider_info": { @@ -150193,7 +148138,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -150203,12 +148149,14 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "mistralai/Mistral-7B-Instruct-v0.3", + "provider_model_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -150218,49 +148166,61 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tools", + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_reasoning": true, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "mistralai/Mistral-7B-Instruct-v0.3", + "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", - "model_version_group_id": "1d07cc56-c54d-4587-b785-5093496397a4", - "name": "Mistral: Mistral 7B Instruct v0.3", + "instruct_type": null, + "model_version_group_id": null, + "name": "NVIDIA: Nemotron Nano 9B V2", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-7b-instruct-v0.3", - "reasoning_config": null, + "permaslug": "nvidia/nemotron-nano-9b-v2", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, "router": null, - "short_name": "Mistral 7B Instruct v0.3", - "slug": "mistralai/mistral-7b-instruct-v0.3", + "short_name": "Nemotron Nano 9B V2", + "slug": "nvidia/nemotron-nano-9b-v2", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32768, - "created_at": "2025-01-30T16:43:29.33592+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.3, + "temperature": null, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -150269,6 +148229,11 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "structured_outputs": true + }, + "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -150278,7 +148243,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "b5615182-891f-418e-841a-246efbe6749c", + "id": "c7cc0654-cfe3-4531-bd8a-68cd162ba020", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -150287,64 +148252,59 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32768, - "created_at": "2025-01-30T16:43:29.33592+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.3, + "temperature": null, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed for efficient local deployment.\n\nThe model achieves 81% accuracy on the MMLU benchmark and performs competitively with larger models like Llama 3.3 70B and Qwen 32B, while operating at three times the speed on equivalent hardware. [Read the blog post about the model here.](https://mistral.ai/news/mistral-small-3/)", + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Mistral", + "group": "GPT", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-24B-Instruct-2501", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-24b-instruct-2501", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Mistral Small 3", - "slug": "mistralai/mistral-small-24b-instruct-2501", - "updated_at": "2025-12-16T18:22:59.07006+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mistral-small-24b-instruct-2501", - "model_variant_slug": "mistralai/mistral-small-24b-instruct-2501", + "model_variant_permaslug": "openai/gpt-oss-120b", + "model_variant_slug": "openai/gpt-oss-120b", "moderation_required": false, - "name": "Together | mistralai/mistral-small-24b-instruct-2501", + "name": "Together | openai/gpt-oss-120b", "pricing": { - "completion": "0.0000003", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Together", "provider_info": { @@ -150497,7 +148457,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -150507,12 +148468,15 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "mistralai/Mistral-Small-24B-Instruct-2501", + "provider_model_id": "openai/gpt-oss-120b", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", "max_tokens", "temperature", "top_p", @@ -150527,7 +148491,7 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" @@ -150535,47 +148499,49 @@ "features": { "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Mistral", + "group": "GPT", "has_text_output": true, - "hf_slug": "mistralai/Mistral-Small-24B-Instruct-2501", + "hf_slug": "openai/gpt-oss-120b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mistral Small 3", + "name": "OpenAI: gpt-oss-120b", "output_modalities": ["text"], - "permaslug": "mistralai/mistral-small-24b-instruct-2501", + "permaslug": "openai/gpt-oss-120b", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Mistral Small 3", - "slug": "mistralai/mistral-small-24b-instruct-2501", - "updated_at": "2025-12-16T18:22:59.07006+00:00", + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "mistralai", - "context_length": 32768, - "created_at": "2023-12-10T00:00:00+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -150584,6 +148550,7 @@ "training": false }, "features": { + "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -150593,7 +148560,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "2334f702-4b18-4df0-b22e-c99bc7340b90", + "id": "10107fa7-70af-44d9-9c95-1e032f704752", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -150602,51 +148569,61 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "mistralai", - "context_length": 32768, - "created_at": "2023-12-10T00:00:00+00:00", + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:09+00:00", "default_parameters": { - "temperature": 0.3 + "frequency_penalty": null, + "temperature": null, + "top_p": null }, - "default_stops": ["[INST]", ""], + "default_stops": [], "default_system": null, - "description": "Mixtral 8x7B Instruct is a pretrained generative Sparse Mixture of Experts, by Mistral AI, for chat and instruction use. Incorporates 8 experts (feed-forward networks) for a total of 47 billion parameters.\n\nInstruct model fine-tuned by Mistral. #moe", - "features": {}, - "group": "Mistral", + "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mixtral 8x7B Instruct", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "mistralai/mixtral-8x7b-instruct", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-20b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mixtral 8x7B Instruct", - "slug": "mistralai/mixtral-8x7b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "mistralai/mixtral-8x7b-instruct", - "model_variant_slug": "mistralai/mixtral-8x7b-instruct", + "model_variant_permaslug": "openai/gpt-oss-20b", + "model_variant_slug": "openai/gpt-oss-20b", "moderation_required": false, - "name": "Together | mistralai/mixtral-8x7b-instruct", + "name": "Together | openai/gpt-oss-20b", "pricing": { - "completion": "0.0000006", + "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.00000005" }, "provider_display_name": "Together", "provider_info": { @@ -150799,7 +148776,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -150809,12 +148787,14 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "provider_model_id": "openai/gpt-oss-20b", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -150824,47 +148804,62 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, - "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_reasoning": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": {}, - "group": "Mistral", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": null + }, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "GPT", "has_text_output": true, - "hf_slug": "mistralai/Mixtral-8x7B-Instruct-v0.1", + "hf_slug": "openai/gpt-oss-20b", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "mistral", + "instruct_type": null, "model_version_group_id": null, - "name": "Mistral: Mixtral 8x7B Instruct", + "name": "OpenAI: gpt-oss-20b", "output_modalities": ["text"], - "permaslug": "mistralai/mixtral-8x7b-instruct", - "reasoning_config": null, + "permaslug": "openai/gpt-oss-20b", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Mixtral 8x7B Instruct", - "slug": "mistralai/mixtral-8x7b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "gpt-oss-20b", + "slug": "openai/gpt-oss-20b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "qwen", + "context_length": 32768, + "created_at": "2024-10-16T00:00:00+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -150873,11 +148868,6 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -150887,7 +148877,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "92d45684-e3bf-4f16-a9c4-57b4762df4c3", + "id": "54084a6f-ed4c-4333-bfb0-5b57cb736105", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -150896,17 +148886,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-09-04T21:25:47.673205+00:00", - "default_parameters": {}, - "default_stops": [], + "author": "qwen", + "context_length": 131072, + "created_at": "2024-10-16T00:00:00+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", + "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "features": { "reasoning_config": { "end_token": null, @@ -150914,41 +148908,36 @@ "system_prompt": null } }, - "group": "Other", + "group": "Qwen", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen2.5-7B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen2.5 7B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "qwen/qwen-2.5-7b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Qwen2.5 7B Instruct", + "slug": "qwen/qwen-2.5-7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-0905", - "model_variant_slug": "moonshotai/kimi-k2-0905", + "model_variant_permaslug": "qwen/qwen-2.5-7b-instruct", + "model_variant_slug": "qwen/qwen-2.5-7b-instruct", "moderation_required": false, - "name": "Together | moonshotai/kimi-k2-0905", + "name": "Together | qwen/qwen-2.5-7b-instruct", "pricing": { - "completion": "0.000003", + "completion": "0.0000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000003" }, "provider_display_name": "Together", "provider_info": { @@ -151101,7 +149090,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -151111,14 +149101,12 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "moonshotai/Kimi-K2-Instruct-0905", + "provider_model_id": "Qwen/Qwen2.5-7B-Instruct-Turbo", "provider_name": "Together", "provider_region": null, - "provider_slug": "together", - "quantization": "unknown", + "provider_slug": "together/fp8", + "quantization": "fp8", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", @@ -151128,13 +149116,11 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -151145,44 +149131,40 @@ "system_prompt": null } }, - "group": "Other", + "group": "Qwen", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Instruct-0905", + "hf_slug": "Qwen/Qwen2.5-7B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 0905", + "name": "Qwen: Qwen2.5 7B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-0905", + "permaslug": "qwen/qwen-2.5-7b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Kimi K2 0905", - "slug": "moonshotai/kimi-k2-0905", + "short_name": "Qwen2.5 7B Instruct", + "slug": "qwen/qwen-2.5-7b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 32768, + "created_at": "2025-02-01T11:45:11.997326+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", + "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 32768, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -151191,8 +149173,7 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -151202,7 +149183,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5ac3fc1e-e83e-412f-9221-34b5e8919b3a", + "id": "420ebd09-e91c-484e-98cf-3d6c9d43e73c", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -151215,60 +149196,40 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "moonshotai", - "context_length": 262144, - "created_at": "2025-11-06T14:50:22.752525+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 131072, + "created_at": "2025-02-01T11:45:11.997326+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "Qwen: Qwen2.5 VL 72B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "qwen/qwen2.5-vl-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen2.5 VL 72B Instruct", + "slug": "qwen/qwen2.5-vl-72b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "moonshotai/kimi-k2-thinking-20251106", - "model_variant_slug": "moonshotai/kimi-k2-thinking", + "model_variant_permaslug": "qwen/qwen2.5-vl-72b-instruct", + "model_variant_slug": "qwen/qwen2.5-vl-72b-instruct", "moderation_required": false, - "name": "Together | moonshotai/kimi-k2-thinking-20251106", + "name": "Together | qwen/qwen2.5-vl-72b-instruct", "pricing": { - "completion": "0.000004", + "completion": "0.000008", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000012", - "request": "0", - "web_search": "0" + "prompt": "0.00000195" }, "provider_display_name": "Together", "provider_info": { @@ -151421,7 +149382,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -151431,14 +149393,12 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "moonshotai/Kimi-K2-Thinking", + "provider_model_id": "Qwen/Qwen2.5-VL-72B-Instruct", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -151448,59 +149408,45 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tool_choice", - "tools", - "structured_outputs" + "min_p" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } - }, - "group": "Other", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "moonshotai/Kimi-K2-Thinking", + "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "MoonshotAI: Kimi K2 Thinking", + "name": "Qwen: Qwen2.5 VL 72B Instruct", "output_modalities": ["text"], - "permaslug": "moonshotai/kimi-k2-thinking-20251106", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "qwen/qwen2.5-vl-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Kimi K2 Thinking", - "slug": "moonshotai/kimi-k2-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen2.5 VL 72B Instruct", + "slug": "qwen/qwen2.5-vl-72b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "nvidia", - "context_length": 131072, - "created_at": "2025-09-05T21:13:27.486887+00:00", + "author": "qwen", + "context_length": 40960, + "created_at": "2025-04-28T21:29:17.25671+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", + "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 40960, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -151509,6 +149455,7 @@ "training": false }, "features": { + "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -151518,7 +149465,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "362c51e8-b832-4ef5-ac0b-3a3ec4286d5f", + "id": "171c8c1d-75ea-415c-8cc1-1fda69234e8d", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -151531,55 +149478,48 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "nvidia", - "context_length": 32000, - "created_at": "2025-09-05T21:13:27.486887+00:00", + "author": "qwen", + "context_length": 131072, + "created_at": "2025-04-28T21:29:17.25671+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and tasks by first generating a reasoning trace and then concluding with a final response. \n\nThe model's reasoning capabilities can be controlled via a system prompt. If the user prefers the model to provide its final answer without intermediate reasoning traces, it can be configured to do so.", + "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", "features": { "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", + "hf_slug": "Qwen/Qwen3-235B-A22B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "NVIDIA: Nemotron Nano 9B V2", + "name": "Qwen: Qwen3 235B A22B", "output_modalities": ["text"], - "permaslug": "nvidia/nemotron-nano-9b-v2", + "permaslug": "qwen/qwen3-235b-a22b-04-28", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "Nemotron Nano 9B V2", - "slug": "nvidia/nemotron-nano-9b-v2", + "short_name": "Qwen3 235B A22B", + "slug": "qwen/qwen3-235b-a22b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "nvidia/nemotron-nano-9b-v2", - "model_variant_slug": "nvidia/nemotron-nano-9b-v2", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-04-28", + "model_variant_slug": "qwen/qwen3-235b-a22b", "moderation_required": false, - "name": "Together | nvidia/nemotron-nano-9b-v2", + "name": "Together | qwen/qwen3-235b-a22b-04-28", "pricing": { - "completion": "0.00000025", + "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Together", "provider_info": { @@ -151732,7 +149672,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -151742,11 +149683,11 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", + "provider_model_id": "Qwen/Qwen3-235B-A22B-fp8-tput", "provider_name": "Together", "provider_region": null, - "provider_slug": "together", - "quantization": "unknown", + "provider_slug": "together/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", @@ -151759,61 +149700,53 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "nvidia/NVIDIA-Nemotron-Nano-9B-v2", + "hf_slug": "Qwen/Qwen3-235B-A22B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "NVIDIA: Nemotron Nano 9B V2", + "name": "Qwen: Qwen3 235B A22B", "output_modalities": ["text"], - "permaslug": "nvidia/nemotron-nano-9b-v2", + "permaslug": "qwen/qwen3-235b-a22b-04-28", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "Nemotron Nano 9B V2", - "slug": "nvidia/nemotron-nano-9b-v2", + "short_name": "Qwen3 235B A22B", + "slug": "qwen/qwen3-235b-a22b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -151822,11 +149755,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "structured_outputs": true - }, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -151836,7 +149764,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "c7cc0654-cfe3-4531-bd8a-68cd162ba020", + "id": "48da8dd9-87ca-4faf-85ff-a62b3a313a35", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -151849,60 +149777,50 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-21T17:39:15.880992+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b", - "model_variant_slug": "openai/gpt-oss-120b", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", + "model_variant_slug": "qwen/qwen3-235b-a22b-2507", "moderation_required": false, - "name": "Together | openai/gpt-oss-120b", + "name": "Together | qwen/qwen3-235b-a22b-07-25", "pricing": { "completion": "0.0000006", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Together", "provider_info": { @@ -152055,7 +149973,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -152065,15 +149984,12 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "openai/gpt-oss-120b", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507-tput", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", "max_tokens", "temperature", "top_p", @@ -152088,57 +150004,56 @@ "tool_choice" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Qwen: Qwen3 235B A22B Instruct 2507", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "qwen/qwen3-235b-a22b-07-25", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 235B A22B Instruct 2507", + "slug": "qwen/qwen3-235b-a22b-2507", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-25T13:19:17.179049+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -152147,7 +150062,6 @@ "training": false }, "features": { - "is_mandatory_reasoning": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -152157,7 +150071,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "10107fa7-70af-44d9-9c95-1e032f704752", + "id": "638a3108-ef85-47db-9845-7cc29e9fe42b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -152170,62 +150084,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:09+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-07-25T13:19:17.179049+00:00", "default_parameters": { "frequency_penalty": null, "temperature": null, "top_p": null }, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for lower-latency inference and deployability on consumer or single-GPU hardware. The model is trained in OpenAI’s Harmony response format and supports reasoning level configuration, fine-tuning, and agentic capabilities including function calling, tool use, and structured outputs.", + "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-20b", - "model_variant_slug": "openai/gpt-oss-20b", + "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", "moderation_required": false, - "name": "Together | openai/gpt-oss-20b", + "name": "Together | qwen/qwen3-235b-a22b-thinking-2507", "pricing": { - "completion": "0.0000002", + "completion": "0.000003", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000065" }, "provider_display_name": "Together", "provider_info": { @@ -152378,7 +150285,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -152388,7 +150296,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "openai/gpt-oss-20b", + "provider_model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", "provider_name": "Together", "provider_region": null, "provider_slug": "together", @@ -152405,62 +150313,58 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": { - "should_hoist_and_merge_system_messages": null - }, + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "GPT", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "openai/gpt-oss-20b", + "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "OpenAI: gpt-oss-20b", + "name": "Qwen: Qwen3 235B A22B Thinking 2507", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-20b", + "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-20b", - "slug": "openai/gpt-oss-20b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "Qwen3 235B A22B Thinking 2507", + "slug": "qwen/qwen3-235b-a22b-thinking-2507", + "updated_at": "2026-01-08T20:02:38.719902+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 32768, - "created_at": "2024-10-16T00:00:00+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "context_length": 262144, + "created_at": "2025-07-23T00:29:06+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -152478,7 +150382,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "54084a6f-ed4c-4333-bfb0-5b57cb736105", + "id": "443141dd-d524-4980-bcd8-097b34ec0025", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -152487,21 +150391,17 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2024-10-16T00:00:00+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": null, - "top_p": null - }, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "context_length": 1048576, + "created_at": "2025-07-23T00:29:06+00:00", + "default_parameters": {}, + "default_stops": [], "default_system": null, - "description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "features": { "reasoning_config": { "end_token": null, @@ -152509,41 +150409,36 @@ "system_prompt": null } }, - "group": "Qwen", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-7B-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 7B Instruct", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-7b-instruct", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen2.5 7B Instruct", - "slug": "qwen/qwen-2.5-7b-instruct", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen-2.5-7b-instruct", - "model_variant_slug": "qwen/qwen-2.5-7b-instruct", + "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "model_variant_slug": "qwen/qwen3-coder", "moderation_required": false, - "name": "Together | qwen/qwen-2.5-7b-instruct", + "name": "Together | qwen/qwen3-coder-480b-a35b-07-25", "pricing": { - "completion": "0.0000003", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "prompt": "0.000002" }, "provider_display_name": "Together", "provider_info": { @@ -152696,7 +150591,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -152706,7 +150602,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "Qwen/Qwen2.5-7B-Instruct-Turbo", + "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", "provider_name": "Together", "provider_region": null, "provider_slug": "together/fp8", @@ -152721,11 +150617,13 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, @@ -152736,40 +150634,44 @@ "system_prompt": null } }, - "group": "Qwen", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-7B-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "chatml", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 7B Instruct", + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-7b-instruct", + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen2.5 7B Instruct", - "slug": "qwen/qwen-2.5-7b-instruct", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 32768, - "created_at": "2025-02-01T11:45:11.997326+00:00", - "default_parameters": {}, + "context_length": 262144, + "created_at": "2026-02-04T00:15:01.820167+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", + "description": "Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per token, delivering performance comparable to models with 10 to 20x higher active compute, which makes it well suited for cost-sensitive, always-on agent deployment.\n\nThe model is trained with a strong agentic focus and performs reliably on long-horizon coding tasks, complex tool usage, and recovery from execution failures. With a native 256k context window, it integrates cleanly into real-world CLI and IDE environments and adapts well to common agent scaffolds used by modern coding tools. The model operates exclusively in non-thinking mode and does not emit blocks, simplifying integration for production coding agents.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 32768, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -152778,7 +150680,6 @@ "training": false }, "features": { - "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -152788,7 +150689,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "420ebd09-e91c-484e-98cf-3d6c9d43e73c", + "id": "a155e340-bea9-495a-afd3-b6c3ce442b64", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -152802,44 +150703,52 @@ "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2025-02-01T11:45:11.997326+00:00", - "default_parameters": {}, + "context_length": 262144, + "created_at": "2026-02-04T00:15:01.820167+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.95 + }, "default_stops": [], "default_system": null, - "description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.", - "features": {}, + "description": "Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per token, delivering performance comparable to models with 10 to 20x higher active compute, which makes it well suited for cost-sensitive, always-on agent deployment.\n\nThe model is trained with a strong agentic focus and performs reliably on long-horizon coding tasks, complex tool usage, and recovery from execution failures. With a native 256k context window, it integrates cleanly into real-world CLI and IDE environments and adapts well to common agent scaffolds used by modern coding tools. The model operates exclusively in non-thinking mode and does not emit blocks, simplifying integration for production coding agents.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-Next", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "Qwen: Qwen3 Coder Next", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-coder-next-2025-02-03", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Coder Next", + "slug": "qwen/qwen3-coder-next", + "updated_at": "2026-02-04T00:27:00.409072+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen2.5-vl-72b-instruct", - "model_variant_slug": "qwen/qwen2.5-vl-72b-instruct", + "model_variant_permaslug": "qwen/qwen3-coder-next-2025-02-03", + "model_variant_slug": "qwen/qwen3-coder-next", "moderation_required": false, - "name": "Together | qwen/qwen2.5-vl-72b-instruct", + "name": "Together | qwen/qwen3-coder-next-2025-02-03", "pricing": { - "completion": "0.000008", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000195", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, "provider_display_name": "Together", "provider_info": { @@ -152992,7 +150901,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -153002,7 +150912,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "Qwen/Qwen2.5-VL-72B-Instruct", + "provider_model_id": "Qwen/Qwen3-Coder-Next-FP8", "provider_name": "Together", "provider_region": null, "provider_slug": "together", @@ -153017,45 +150927,62 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tool_choice", + "tools", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, - "features": {}, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-VL-72B-Instruct", + "hf_slug": "Qwen/Qwen3-Coder-Next", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen2.5 VL 72B Instruct", + "name": "Qwen: Qwen3 Coder Next", "output_modalities": ["text"], - "permaslug": "qwen/qwen2.5-vl-72b-instruct", - "reasoning_config": null, + "permaslug": "qwen/qwen3-coder-next-2025-02-03", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "Qwen2.5 VL 72B Instruct", - "slug": "qwen/qwen2.5-vl-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Coder Next", + "slug": "qwen/qwen3-coder-next", + "updated_at": "2026-02-04T00:27:00.409072+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 40960, - "created_at": "2025-04-28T21:29:17.25671+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "context_length": 262144, + "created_at": "2025-09-11T17:38:04.192907+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 40960, + "context_length": 262144, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -153064,7 +150991,10 @@ "training": false }, "features": { - "supported_parameters": {}, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -153074,7 +151004,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "171c8c1d-75ea-415c-8cc1-1fda69234e8d", + "id": "5d74eb7f-f050-48e9-904d-4922e4299317", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -153088,52 +151018,54 @@ "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 131072, - "created_at": "2025-04-28T21:29:17.25671+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "context_length": 262144, + "created_at": "2025-09-11T17:38:04.192907+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", + "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-04-28", + "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B", - "slug": "qwen/qwen3-235b-a22b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Next 80B A3B Thinking", + "slug": "qwen/qwen3-next-80b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-04-28", - "model_variant_slug": "qwen/qwen3-235b-a22b", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-thinking", "moderation_required": false, - "name": "Together | qwen/qwen3-235b-a22b-04-28", + "name": "Together | qwen/qwen3-next-80b-a3b-thinking-2509", "pricing": { - "completion": "0.0000006", + "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.00000015" }, "provider_display_name": "Together", "provider_info": { @@ -153286,7 +151218,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -153296,14 +151229,16 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-fp8-tput", + "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking", "provider_name": "Together", "provider_region": null, - "provider_slug": "together/fp8", - "quantization": "fp8", + "provider_slug": "together", + "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -153313,49 +151248,58 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p" + "min_p", + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "qwen3", + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B", + "name": "Qwen: Qwen3 Next 80B A3B Thinking", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-04-28", + "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", "reasoning_config": { "end_token": "", - "start_token": "" + "start_token": "", + "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B", - "slug": "qwen/qwen3-235b-a22b", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Qwen3 Next 80B A3B Thinking", + "slug": "qwen/qwen3-next-80b-a3b-thinking", + "updated_at": "2026-01-08T19:23:52.555156+00:00", "warning_message": null }, { "author": "qwen", "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, + "created_at": "2025-10-23T14:55:32.53917+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Qwen3-VL-32B-Instruct is a large-scale multimodal vision-language model designed for high-precision understanding and reasoning across text, images, and video. With 32 billion parameters, it combines deep visual perception with advanced text comprehension, enabling fine-grained spatial reasoning, document and scene analysis, and long-horizon video understanding.Robust OCR in 32 languages, and enhanced multimodal fusion through Interleaved-MRoPE and DeepStack architectures. Optimized for agentic interaction and visual tool use, Qwen3-VL-32B delivers state-of-the-art performance for complex real-world multimodal tasks.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, @@ -153377,7 +151321,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "48da8dd9-87ca-4faf-85ff-a62b3a313a35", + "id": "cc9ec262-af0e-4a84-9ce3-218a517f85b3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -153392,11 +151336,15 @@ "model": { "author": "qwen", "context_length": 262144, - "created_at": "2025-07-21T17:39:15.880992+00:00", - "default_parameters": {}, + "created_at": "2025-10-23T14:55:32.53917+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following, logical reasoning, math, code, and tool usage. The model supports a native 262K context length and does not implement \"thinking mode\" ( blocks).\n\nCompared to its base variant, this version delivers significant gains in knowledge coverage, long-context reasoning, coding benchmarks, and alignment with open-ended tasks. It is particularly strong on multilingual understanding, math reasoning (e.g., AIME, HMMT), and alignment evaluations like Arena-Hard and WritingBench.", + "description": "Qwen3-VL-32B-Instruct is a large-scale multimodal vision-language model designed for high-precision understanding and reasoning across text, images, and video. With 32 billion parameters, it combines deep visual perception with advanced text comprehension, enabling fine-grained spatial reasoning, document and scene analysis, and long-horizon video understanding.Robust OCR in 32 languages, and enhanced multimodal fusion through Interleaved-MRoPE and DeepStack architectures. Optimized for agentic interaction and visual tool use, Qwen3-VL-32B delivers state-of-the-art performance for complex real-world multimodal tasks.", "features": { "reasoning_config": { "end_token": null, @@ -153404,41 +151352,36 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "Qwen/Qwen3-VL-32B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Qwen: Qwen3 VL 32B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "qwen/qwen3-vl-32b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Qwen3 VL 32B Instruct", + "slug": "qwen/qwen3-vl-32b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-07-25", - "model_variant_slug": "qwen/qwen3-235b-a22b-2507", + "model_variant_permaslug": "qwen/qwen3-vl-32b-instruct", + "model_variant_slug": "qwen/qwen3-vl-32b-instruct", "moderation_required": false, - "name": "Together | qwen/qwen3-235b-a22b-07-25", + "name": "Together | qwen/qwen3-vl-32b-instruct", "pricing": { - "completion": "0.0000006", + "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000005" }, "provider_display_name": "Together", "provider_info": { @@ -153591,7 +151534,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -153601,7 +151545,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Instruct-2507-tput", + "provider_model_id": "Qwen/Qwen3-VL-32B-Instruct", "provider_name": "Together", "provider_region": null, "provider_slug": "together", @@ -153617,12 +151561,12 @@ "repetition_penalty", "logit_bias", "min_p", - "tools", - "tool_choice" + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, @@ -153633,40 +151577,40 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "hf_slug": "Qwen/Qwen3-VL-32B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Instruct 2507", + "name": "Qwen: Qwen3 VL 32B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-07-25", + "permaslug": "qwen/qwen3-vl-32b-instruct", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Instruct 2507", - "slug": "qwen/qwen3-235b-a22b-2507", + "short_name": "Qwen3 VL 32B Instruct", + "slug": "qwen/qwen3-vl-32b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", "context_length": 262144, - "created_at": "2025-07-25T13:19:17.179049+00:00", + "created_at": "2025-10-14T17:35:08.402158+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, @@ -153688,7 +151632,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "638a3108-ef85-47db-9845-7cc29e9fe42b", + "id": "9058e91e-03a5-45eb-a678-30b53308f21a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -153702,59 +151646,53 @@ "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-25T13:19:17.179049+00:00", + "context_length": 256000, + "created_at": "2025-10-14T17:35:08.402158+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 0.7, + "top_p": 0.8 }, - "default_stops": ["<|im_start|>", "<|im_end|>"], + "default_stops": [], "default_system": null, - "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", + "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwen3", + "input_modalities": ["image", "text"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "name": "Qwen: Qwen3 VL 8B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "permaslug": "qwen/qwen3-vl-8b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "Qwen3 VL 8B Instruct", + "slug": "qwen/qwen3-vl-8b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-235b-a22b-thinking-2507", - "model_variant_slug": "qwen/qwen3-235b-a22b-thinking-2507", + "model_variant_permaslug": "qwen/qwen3-vl-8b-instruct", + "model_variant_slug": "qwen/qwen3-vl-8b-instruct", "moderation_required": false, - "name": "Together | qwen/qwen3-235b-a22b-thinking-2507", + "name": "Together | qwen/qwen3-vl-8b-instruct", "pricing": { - "completion": "0.000003", + "completion": "0.00000068", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000065", - "request": "0", - "web_search": "0" + "prompt": "0.00000018" }, "provider_display_name": "Together", "provider_info": { @@ -153907,7 +151845,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -153917,14 +151856,12 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "provider_model_id": "Qwen/Qwen3-VL-8B-Instruct", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ - "reasoning", - "include_reasoning", "max_tokens", "temperature", "top_p", @@ -153935,57 +151872,56 @@ "repetition_penalty", "logit_bias", "min_p", - "tools", - "tool_choice" + "structured_outputs", + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, - "supports_tool_parameters": true, + "supports_reasoning": false, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { - "chat_template_config": {}, "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, "group": "Qwen3", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "qwen3", + "input_modalities": ["image", "text"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 235B A22B Thinking 2507", + "name": "Qwen: Qwen3 VL 8B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-235b-a22b-thinking-2507", + "permaslug": "qwen/qwen3-vl-8b-instruct", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 235B A22B Thinking 2507", - "slug": "qwen/qwen3-235b-a22b-thinking-2507", - "updated_at": "2026-01-08T20:02:38.719902+00:00", + "short_name": "Qwen3 VL 8B Instruct", + "slug": "qwen/qwen3-vl-8b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "qwen", - "context_length": 262144, - "created_at": "2025-07-23T00:29:06+00:00", + "context_length": 131072, + "created_at": "2024-09-19T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -154003,7 +151939,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "443141dd-d524-4980-bcd8-097b34ec0025", + "id": "dd0532ec-47ac-4d9a-9bbd-cc30a98c260a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -154012,59 +151948,44 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": null, + "max_completion_tokens": 2048, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "qwen", - "context_length": 1048576, - "created_at": "2025-07-23T00:29:06+00:00", + "context_length": 131072, + "created_at": "2024-09-19T00:00:00+00:00", "default_parameters": {}, - "default_stops": [], + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], "default_system": null, - "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "Qwen/Qwen2.5-72B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Qwen2.5 72B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "qwen/qwen-2.5-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", + "short_name": "Qwen2.5 72B Instruct", + "slug": "qwen/qwen-2.5-72b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "model_variant_slug": "qwen/qwen3-coder", + "model_variant_permaslug": "qwen/qwen-2.5-72b-instruct", + "model_variant_slug": "qwen/qwen-2.5-72b-instruct", "moderation_required": false, - "name": "Together | qwen/qwen3-coder-480b-a35b-07-25", + "name": "Together | qwen/qwen-2.5-72b-instruct", "pricing": { - "completion": "0.000002", + "completion": "0.0000012", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000012" }, "provider_display_name": "Together", "provider_info": { @@ -154217,7 +152138,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -154227,7 +152149,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", + "provider_model_id": "Qwen/Qwen2.5-72B-Instruct-Turbo", "provider_name": "Together", "provider_region": null, "provider_slug": "together/fp8", @@ -154242,61 +152164,49 @@ "top_k", "repetition_penalty", "logit_bias", - "min_p", - "tools", - "tool_choice" + "min_p" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, - "features": { - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - } - }, - "group": "Qwen3", + "features": {}, + "group": "Qwen", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "hf_slug": "Qwen/Qwen2.5-72B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen: Qwen3 Coder 480B A35B", + "name": "Qwen2.5 72B Instruct", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", - "reasoning_config": { - "end_token": null, - "start_token": null, - "system_prompt": null - }, + "permaslug": "qwen/qwen-2.5-72b-instruct", + "reasoning_config": null, "router": null, - "short_name": "Qwen3 Coder 480B A35B", - "slug": "qwen/qwen3-coder", + "short_name": "Qwen2.5 72B Instruct", + "slug": "qwen/qwen-2.5-72b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:38:04.192907+00:00", + "author": "z-ai", + "context_length": 202752, + "created_at": "2025-09-30T12:32:56.306946+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, + "temperature": 0.6, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 202752, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -154305,10 +152215,7 @@ "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -154318,7 +152225,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "5d74eb7f-f050-48e9-904d-4922e4299317", + "id": "227db9b5-e0d2-4053-9b23-fb3ad58e14ac", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -154331,17 +152238,17 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-09-11T17:38:04.192907+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-09-30T12:32:56.306946+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, + "temperature": 0.6, "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic planning, and reports strong results across knowledge, reasoning, coding, alignment, and multilingual evaluations. Compared with prior Qwen3 variants, it emphasizes stability under long chains of thought and efficient scaling during inference, and it is tuned to follow complex instructions while reducing repetitive or off-task behavior.\n\nThe model is suitable for agent frameworks and tool use (function calling), retrieval-heavy workflows, and standardized benchmarking where step-by-step solutions are required. It supports long, detailed completions and leverages throughput-oriented techniques (e.g., multi-token prediction) for faster generation. Note that it operates in thinking-only mode.", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -154350,41 +152257,36 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Next 80B A3B Thinking", - "slug": "qwen/qwen3-next-80b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", - "model_variant_slug": "qwen/qwen3-next-80b-a3b-thinking", + "model_variant_permaslug": "z-ai/glm-4.6", + "model_variant_slug": "z-ai/glm-4.6", "moderation_required": false, - "name": "Together | qwen/qwen3-next-80b-a3b-thinking-2509", + "name": "Together | z-ai/glm-4.6", "pricing": { - "completion": "0.0000015", + "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "Together", "provider_info": { @@ -154537,7 +152439,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -154547,7 +152450,7 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "provider_model_id": "zai-org/GLM-4.6", "provider_name": "Together", "provider_region": null, "provider_slug": "together", @@ -154555,8 +152458,6 @@ "supported_parameters": [ "reasoning", "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", @@ -154568,7 +152469,9 @@ "logit_bias", "min_p", "tools", - "tool_choice" + "tool_choice", + "structured_outputs", + "response_format" ], "supports_multipart": true, "supports_reasoning": true, @@ -154584,44 +152487,44 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "hf_slug": null, "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 Next 80B A3B Thinking", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-next-80b-a3b-thinking-2509", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 Next 80B A3B Thinking", - "slug": "qwen/qwen3-next-80b-a3b-thinking", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-23T14:55:32.53917+00:00", + "author": "z-ai", + "context_length": 202752, + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-32B-Instruct is a large-scale multimodal vision-language model designed for high-precision understanding and reasoning across text, images, and video. With 32 billion parameters, it combines deep visual perception with advanced text comprehension, enabling fine-grained spatial reasoning, document and scene analysis, and long-horizon video understanding.Robust OCR in 32 languages, and enhanced multimodal fusion through Interleaved-MRoPE and DeepStack architectures. Optimized for agentic interaction and visual tool use, Qwen3-VL-32B delivers state-of-the-art performance for complex real-world multimodal tasks.", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "endpoint": { "adapter_name": "TogetherAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 202752, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://www.together.ai/privacy", @@ -154630,6 +152533,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -154639,7 +152543,7 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "cc9ec262-af0e-4a84-9ce3-218a517f85b3", + "id": "dfd97543-2c60-42df-92fb-9b3890e5e800", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -154652,59 +152556,55 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-23T14:55:32.53917+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-32B-Instruct is a large-scale multimodal vision-language model designed for high-precision understanding and reasoning across text, images, and video. With 32 billion parameters, it combines deep visual perception with advanced text comprehension, enabling fine-grained spatial reasoning, document and scene analysis, and long-horizon video understanding.Robust OCR in 32 languages, and enhanced multimodal fusion through Interleaved-MRoPE and DeepStack architectures. Optimized for agentic interaction and visual tool use, Qwen3-VL-32B delivers state-of-the-art performance for complex real-world multimodal tasks.", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-32B-Instruct", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 32B Instruct", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-32b-instruct", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 32B Instruct", - "slug": "qwen/qwen3-vl-32b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-32b-instruct", - "model_variant_slug": "qwen/qwen3-vl-32b-instruct", + "model_variant_permaslug": "z-ai/glm-4.7-20251222", + "model_variant_slug": "z-ai/glm-4.7", "moderation_required": false, - "name": "Together | qwen/qwen3-vl-32b-instruct", + "name": "Together | z-ai/glm-4.7-20251222", "pricing": { - "completion": "0.0000015", + "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000005", - "request": "0", - "web_search": "0" + "prompt": "0.00000045" }, "provider_display_name": "Together", "provider_info": { @@ -154857,7 +152757,8 @@ "pangram/mistral-small-2501", "black-forest-labs/FLUX.2-pro", "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "ServiceNow-AI/Apriel-1.6-15b-Thinker", + "cartesia/sonic-3" ], "isAbortable": true, "isMultipartSupported": true, @@ -154867,12 +152768,14 @@ "slug": "together", "statusPageUrl": "https://status.together.ai/" }, - "provider_model_id": "Qwen/Qwen3-VL-32B-Instruct", + "provider_model_id": "zai-org/GLM-4.7", "provider_name": "Together", "provider_region": null, "provider_slug": "together", "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", "max_tokens", "temperature", "top_p", @@ -154887,61 +152790,76 @@ "response_format" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": false, "variable_pricings": [], "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null } }, - "group": "Qwen", + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-32B-Instruct", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 32B Instruct", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-32b-instruct", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { - "end_token": null, - "start_token": null, + "end_token": "", + "start_token": "", "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 32B Instruct", - "slug": "qwen/qwen3-vl-32b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null - }, + } + ], + "name": "Together", + "slug": "together" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "datacenters": [], + "displayName": "Upstage", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://upstage.ai&size=256" + }, + "models": [ { - "author": "qwen", - "context_length": 262144, - "created_at": "2025-10-14T17:35:08.402158+00:00", + "author": "upstage", + "context_length": 128000, + "created_at": "2026-01-27T02:33:20.601032+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": null, + "top_p": null }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", + "description": "Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized for Korean with English and Japanese support.", "endpoint": { - "adapter_name": "TogetherAdapter", + "adapter_name": "OpenAIAdapter", "can_abort": true, - "context_length": 262144, + "context_length": 128000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.together.ai/privacy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.together.ai/terms-of-service", "training": false }, "features": { @@ -154954,7 +152872,547 @@ }, "has_chat_completions": true, "has_completions": true, - "id": "9058e91e-03a5-45eb-a678-30b53308f21a", + "id": "ca4afae5-6d59-4c6b-a190-4a6376f04e34", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "upstage", + "context_length": 128000, + "created_at": "2026-01-27T02:33:20.601032+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized for Korean with English and Japanese support.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Upstage: Solar Pro 3", + "output_modalities": ["text"], + "permaslug": "upstage/solar-pro-3", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Solar Pro 3", + "slug": "upstage/solar-pro-3", + "updated_at": "2026-01-27T02:54:55.905279+00:00", + "warning_message": null + }, + "model_variant_permaslug": "upstage/solar-pro-3:free", + "model_variant_slug": "upstage/solar-pro-3:free", + "moderation_required": false, + "name": "Upstage | upstage/solar-pro-3:free", + "pricing": { + "completion": "0", + "discount": 0, + "prompt": "0" + }, + "provider_display_name": "Upstage", + "provider_info": { + "adapterName": "OpenAIAdapter", + "baseUrl": "https://api.upstage.ai/v1", + "byokEnabled": false, + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Upstage", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": true, + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://upstage.ai&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Upstage", + "owners": [], + "slug": "upstage", + "statusPageUrl": null + }, + "provider_model_id": "solar-pro3", + "provider_name": "Upstage", + "provider_region": null, + "provider_slug": "upstage", + "quantization": "unknown", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "temperature", + "max_tokens", + "tool_choice", + "tools", + "structured_outputs", + "response_format" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "free" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": null, + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Upstage: Solar Pro 3 (free)", + "output_modalities": ["text"], + "permaslug": "upstage/solar-pro-3", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Solar Pro 3 (free)", + "slug": "upstage/solar-pro-3", + "updated_at": "2026-01-27T02:54:55.905279+00:00", + "warning_message": null + } + ], + "name": "Upstage", + "slug": "upstage" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": false, + "training": false + }, + "displayName": "Venice", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" + }, + "models": [ + { + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-09-25T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_system": null, + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "endpoint": { + "adapter_name": "VeniceAdapter", + "can_abort": true, + "context_length": 131072, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", + "training": false + }, + "features": { + "disable_free_endpoint_limits": true, + "supported_parameters": { + "response_format": false + }, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "e8440b11-29fb-4887-a222-eff9ba33dfbf", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": 8, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-09-25T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_system": null, + "description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it supports eight languages, including English, Spanish, and Hindi, and is adaptable for additional languages.\n\nTrained on 9 trillion tokens, the Llama 3.2 3B model excels in instruction-following, complex reasoning, and tool use. Its balanced performance makes it ideal for applications needing accuracy and efficiency in text generation across multilingual settings.\n\nClick here for the [original model card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/MODEL_CARD.md).\n\nUsage of this model is subject to [Meta's Acceptable Use Policy](https://www.llama.com/llama3/use-policy/).", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": null, + "name": "Meta: Llama 3.2 3B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.2-3b-instruct", + "reasoning_config": null, + "router": null, + "short_name": "Llama 3.2 3B Instruct", + "slug": "meta-llama/llama-3.2-3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "meta-llama/llama-3.2-3b-instruct:free", + "model_variant_slug": "meta-llama/llama-3.2-3b-instruct:free", + "moderation_required": false, + "name": "Venice | meta-llama/llama-3.2-3b-instruct:free", + "pricing": { + "completion": "0", + "discount": 0, + "prompt": "0" + }, + "provider_display_name": "Venice", + "provider_info": { + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", + "training": false + }, + "displayName": "Venice", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": false, + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" + }, + "ignoredProviderModels": [ + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" + ], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Venice", + "owners": ["{}"], + "slug": "venice", + "statusPageUrl": null + }, + "provider_model_id": "llama-3.2-3b", + "provider_name": "Venice", + "provider_region": null, + "provider_slug": "venice/fp16", + "quantization": "fp16", + "supported_parameters": [ + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "top_k" + ], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": false, + "variable_pricings": [], + "variant": "free" + }, + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-3.2-3B-Instruct", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": null, + "name": "Meta: Llama 3.2 3B Instruct (free)", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.2-3b-instruct", + "reasoning_config": null, + "router": null, + "short_name": "Llama 3.2 3B Instruct (free)", + "slug": "meta-llama/llama-3.2-3b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + { + "author": "meta-llama", + "context_length": 65536, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_system": null, + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "endpoint": { + "adapter_name": "VeniceAdapter", + "can_abort": true, + "context_length": 65536, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", + "training": false + }, + "features": { + "disable_free_endpoint_limits": true, + "supported_parameters": { + "response_format": false, + "structured_outputs": false + }, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "839b2e30-a1b4-4974-b980-3e534b5873b1", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": 8, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_system": null, + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, + "router": null, + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct:free", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct:free", + "moderation_required": false, + "name": "Venice | meta-llama/llama-3.3-70b-instruct:free", + "pricing": { + "completion": "0", + "discount": 0, + "prompt": "0" + }, + "provider_display_name": "Venice", + "provider_info": { + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", + "training": false + }, + "displayName": "Venice", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": false, + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" + }, + "ignoredProviderModels": [ + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" + ], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Venice", + "owners": ["{}"], + "slug": "venice", + "statusPageUrl": null + }, + "provider_model_id": "llama-3.3-70b", + "provider_name": "Venice", + "provider_region": null, + "provider_slug": "venice/fp8", + "quantization": "fp8", + "supported_parameters": [ + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "top_k", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "free" + }, + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct (free)", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, + "router": null, + "short_name": "Llama 3.3 70B Instruct (free)", + "slug": "meta-llama/llama-3.3-70b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + { + "author": "minimax", + "context_length": 204800, + "created_at": "2025-12-23T01:56:37+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 1, + "top_p": 0.9 + }, + "default_stops": [], + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "endpoint": { + "adapter_name": "VeniceAdapter", + "can_abort": true, + "context_length": 204800, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", + "training": false + }, + "features": { + "reasoning_return_mechanism": "reasoning-content", + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "1c0609fd-acb8-4678-a0c2-21a894cf893a", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -154967,17 +153425,233 @@ "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 256000, - "created_at": "2025-10-14T17:35:08.402158+00:00", + "author": "minimax", + "context_length": 204800, + "created_at": "2025-12-23T01:56:37+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.7, - "top_p": 0.8 + "temperature": 1, + "top_p": 0.9 + }, + "default_stops": [], + "default_system": "You are MiniMax-M2.1, a helpful AI assistant built by MiniMax. Knowledge cutoff: 2025-06.", + "description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).", + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "MiniMaxAI/MiniMax-M2.1", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "MiniMax: MiniMax M2.1", + "output_modalities": ["text"], + "permaslug": "minimax/minimax-m2.1", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", + "warning_message": null + }, + "model_variant_permaslug": "minimax/minimax-m2.1", + "model_variant_slug": "minimax/minimax-m2.1", + "moderation_required": false, + "name": "Venice | minimax/minimax-m2.1", + "pricing": { + "completion": "0.0000016", + "discount": 0, + "input_cache_read": "0.00000004", + "prompt": "0.0000004" + }, + "provider_display_name": "Venice", + "provider_info": { + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", + "training": false + }, + "displayName": "Venice", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": false, + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" + }, + "ignoredProviderModels": [ + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" + ], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Venice", + "owners": ["{}"], + "slug": "venice", + "statusPageUrl": null + }, + "provider_model_id": "minimax-m21", + "provider_name": "Venice", + "provider_region": null, + "provider_slug": "venice", + "quantization": "unknown", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "top_k", + "response_format", + "structured_outputs", + "tools", + "tool_choice", + "logprobs", + "top_logprobs" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": { + "should_hoist_and_merge_system_messages": true + }, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "MiniMaxAI/MiniMax-M2.1", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "MiniMax: MiniMax M2.1", + "output_modalities": ["text"], + "permaslug": "minimax/minimax-m2.1", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "MiniMax M2.1", + "slug": "minimax/minimax-m2.1", + "updated_at": "2026-01-08T19:23:52.555156+00:00", + "warning_message": null + }, + { + "author": "mistralai", + "context_length": 128000, + "created_at": "2025-03-17T19:15:37.00423+00:00", + "default_parameters": { + "temperature": 0.3 + }, + "default_stops": [], + "default_system": null, + "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", + "endpoint": { + "adapter_name": "VeniceAdapter", + "can_abort": true, + "context_length": 128000, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", + "training": false + }, + "features": { + "disable_free_endpoint_limits": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "ecbdc9f1-ecca-4f91-83cf-b3495a60e874", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": 8, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "mistralai", + "context_length": 128000, + "created_at": "2025-03-17T19:15:37.00423+00:00", + "default_parameters": { + "temperature": 0.3 }, "default_stops": [], "default_system": null, - "description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon temporal reasoning, DeepStack for fine-grained visual-text alignment, and text-timestamp alignment for precise event localization.\n\nThe model supports a native 256K-token context window, extensible to 1M tokens, and handles both static and dynamic media inputs for tasks like document parsing, visual question answering, spatial reasoning, and GUI control. It achieves text understanding comparable to leading LLMs while expanding OCR coverage to 32 languages and enhancing robustness under varied visual conditions.", + "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", "features": { "reasoning_config": { "end_token": null, @@ -154985,209 +153659,101 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Mistral", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", + "hf_slug": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 8B Instruct", + "name": "Mistral: Mistral Small 3.1 24B", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-8b-instruct", + "permaslug": "mistralai/mistral-small-3.1-24b-instruct-2503", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 8B Instruct", - "slug": "qwen/qwen3-vl-8b-instruct", + "short_name": "Mistral Small 3.1 24B", + "slug": "mistralai/mistral-small-3.1-24b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen3-vl-8b-instruct", - "model_variant_slug": "qwen/qwen3-vl-8b-instruct", + "model_variant_permaslug": "mistralai/mistral-small-3.1-24b-instruct-2503:free", + "model_variant_slug": "mistralai/mistral-small-3.1-24b-instruct:free", "moderation_required": false, - "name": "Together | qwen/qwen3-vl-8b-instruct", + "name": "Venice | mistralai/mistral-small-3.1-24b-instruct-2503:free", "pricing": { - "completion": "0.00000068", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000018", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Together", + "provider_display_name": "Venice", "provider_info": { - "adapterName": "TogetherAdapter", - "baseUrl": "https://api.together.xyz/v1", + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.together.ai/privacy", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.together.ai/terms-of-service", + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, - "displayName": "Together", + "displayName": "Venice", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.together.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" }, "ignoredProviderModels": [ - "mixedbread-ai/Mxbai-Rerank-Large-V2", - "BAAI/bge-base-en-v1.5-vllm", - "scb10x/scb10x-typhoon-2-1-gemma3-12b", - "arcee-ai/AFM-4.5B-Preview", - "deepseek-ai/DeepSeek-R1-0528-tput", - "black-forest-labs/FLUX.1-kontext-dev", - "google/gemma-3-27b-it", - "Qwen/Qwen3-32B-FP8", - "openai/whisper-large-v3", - "eddie/Qwen3-32B", - "eddiehou/meta-llama/Llama-3.1-405B", - "yan/deepseek-ai-deepseek-v3", - "moz/Llama-3.3-70B-Instruct-Turbo", - "serverless-qwen-qwen3-32b-fp8", - "qwen-qwen3-32b-fp8-serverless", - "moz-llama-3-3-70b-instruct-turbo", - "moonshotai/Kimi-K2-Instruct-tgl-testing", - "VirtueAIxTogether/VirtueGuard-Text-Lite", - "Virtue-AI/VirtueGuard-Text-Lite", - "black-forest-labs/FLUX.1-krea-dev", - "prosus/qwen-qwen3-32b-fp8-long-context", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-batch", - "deepseek-ai/DeepSeek-R1-DE", - "arize-ai/qwen-2-1.5b-instruct", - "openai/gpt-oss-120b", - "meta-llama/Llama-3-70b-hf", - "Qwen/Qwen2.5-72B-Instruct", - "meta-llama/Meta-Llama-3.1-70B-Instruct-Reference", - "meta-llama/Llama-3.2-1B-Instruct", - "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference", - "meta-llama/Meta-Llama-3-8B-Instruct", - "meta-llama/Llama-3.1-405B-Instruct", - "cartesia/sonic", - "cartesia/sonic-2", - "togethercomputer/MoA-1", - "Salesforce/Llama-Rank-V1", - "black-forest-labs/FLUX.1-schnell", - "lgai/exaone-3-5-32b-instruct", - "lgai/exaone-deep-32b", - "black-forest-labs/FLUX.1-dev", - "marin-community/marin-8b-instruct", - "togethercomputer/Refuel-Llm-V2-Small", - "meta-llama/Llama-3-70b-chat-hf", - "Alibaba-NLP/gte-modernbert-base", - "black-forest-labs/FLUX.1-pro", - "black-forest-labs/FLUX.1.1-pro", - "togethercomputer/MoA-1-Turbo", - "black-forest-labs/FLUX.1-dev-lora", - "meta-llama/Llama-2-70b-hf", - "togethercomputer/m2-bert-80M-32k-retrieval", - "togethercomputer/Refuel-Llm-V2", - "intfloat/multilingual-e5-large-instruct", - "black-forest-labs/FLUX.1-kontext-max", - "black-forest-labs/FLUX.1-schnell-Free", - "black-forest-labs/FLUX.1-kontext-pro", - "BAAI/bge-large-en-v1.5", - "BAAI/bge-base-en-v1.5", - "Qwen/Qwen3-Next-80B-A3B-Instruct", - "ServiceNow-AI/Apriel-1.5-15b-Thinker", - "openai/whisper-large-v3-test", - "codellama/CodeLlama-34b-Instruct-hf", - "meta-llama/Llama-2-13b-chat-hf", - "togethercomputer/m2-bert-80M-8k-retrieval", - "WhereIsAI/UAE-Large-V1", - "meta-llama/Meta-Llama-3.1-405B-Instruct-Lite-Pro", - "togethercomputer/m2-bert-80M-2k-retrieval", - "meta-llama/Meta-Llama-3-70B-Instruct-Lite", - "Meta-Llama/Llama-Guard-7b", - "Gryphe/MythoMax-L2-13b-Lite", - "upstage/SOLAR-10.7B-Instruct-v1.0", - "Qwen/Qwen2.5-14B-Instruct", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free", - "zai-org/GLM-4.5-Air-FP8", - "kwaivgI/kling-1.6-standard", - "HiDream-ai/HiDream-I1-Full", - "ByteDance-Seed/Seedream-3.0", - "ByteDance-Seed/Seedream-4.0", - "Lykon/DreamShaper", - "HiDream-ai/HiDream-I1-Dev", - "Qwen/Qwen-Image", - "RunDiffusion/Juggernaut-pro-flux", - "google/imagen-4.0-preview", - "google/imagen-4.0-ultra", - "google/veo-3.0", - "minimax/hailuo-02", - "stabilityai/stable-diffusion-3-medium", - "black-forest-labs/FLUX.1-Canny-pro", - "google/imagen-4.0-fast", - "minimax/video-01-director", - "HiDream-ai/HiDream-I1-Fast", - "Wan-AI/Wan2.2-T2V-A14B", - "ByteDance/Seedance-1.0-pro", - "google/veo-3.0-fast-audio", - "vidu/vidu-q1", - "kwaivgI/kling-2.1-master", - "google/veo-3.0-audio", - "Rundiffusion/Juggernaut-Lightning-Flux", - "Wan-AI/Wan2.2-I2V-A14B", - "google/flash-image-2.5", - "google/veo-2.0", - "openai/sora-2", - "google/veo-3.0-fast", - "ideogram/ideogram-3.0", - "kwaivgI/kling-2.0-master", - "kwaivgI/kling-2.1-standard", - "pixverse/pixverse-v5", - "stabilityai/stable-diffusion-xl-base-1.0", - "openai/sora-2-pro", - "ByteDance/Seedance-1.0-lite", - "kwaivgI/kling-1.6-pro", - "vidu/vidu-2.0", - "kwaivgI/kling-2.1-pro", - "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", - "canopylabs/orpheus-3b-0.1-ft", - "hexgrad/Kokoro-82M", - "eddie/gemma-2b-it", - "mistralai/Voxtral-Mini-3B-2507", - "arcee-ai/coder-large", - "arcee-ai/virtuoso-large", - "arcee-ai/maestro-reasoning", - "deepcogito/cogito-v2-1-671b", - "arcee_ai/arcee-spotlight", - "google/gemini-3-pro-image", - "mercor/cwm", - "black-forest-labs/FLUX.2-flex", - "keith-aditya/kimi-k2-instruct", - "pangram/mistral-small-2501", - "black-forest-labs/FLUX.2-pro", - "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Together", + "name": "Venice", "owners": ["{}"], - "slug": "together", - "statusPageUrl": "https://status.together.ai/" + "slug": "venice", + "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen3-VL-8B-Instruct", - "provider_name": "Together", + "provider_model_id": "mistral-31-24b", + "provider_name": "Venice", "provider_region": null, - "provider_slug": "together", - "quantization": "unknown", + "provider_slug": "venice/fp8", + "quantization": "fp8", "supported_parameters": [ + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -155195,17 +153761,14 @@ "frequency_penalty", "presence_penalty", "top_k", - "repetition_penalty", - "logit_bias", - "min_p", - "structured_outputs", - "response_format" + "tools", + "tool_choice" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": false, + "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { "reasoning_config": { @@ -155214,48 +153777,53 @@ "system_prompt": null } }, - "group": "Qwen3", + "group": "Mistral", "has_text_output": true, - "hf_slug": "Qwen/Qwen3-VL-8B-Instruct", + "hf_slug": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Qwen: Qwen3 VL 8B Instruct", + "name": "Mistral: Mistral Small 3.1 24B (free)", "output_modalities": ["text"], - "permaslug": "qwen/qwen3-vl-8b-instruct", + "permaslug": "mistralai/mistral-small-3.1-24b-instruct-2503", "reasoning_config": { "end_token": null, "start_token": null, "system_prompt": null }, "router": null, - "short_name": "Qwen3 VL 8B Instruct", - "slug": "qwen/qwen3-vl-8b-instruct", + "short_name": "Mistral Small 3.1 24B (free)", + "slug": "mistralai/mistral-small-3.1-24b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "qwen", - "context_length": 131072, - "created_at": "2024-09-19T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", "endpoint": { - "adapter_name": "TogetherAdapter", + "adapter_name": "VeniceAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.together.ai/privacy", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.together.ai/terms-of-service", + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -155264,8 +153832,8 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "dd0532ec-47ac-4d9a-9bbd-cc30a98c260a", + "has_completions": false, + "id": "c884a526-d529-4263-bd90-a58ea25e99d1", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -155274,215 +153842,307 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 2048, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "qwen", - "context_length": 131072, - "created_at": "2024-09-19T00:00:00+00:00", - "default_parameters": {}, - "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "author": "moonshotai", + "context_length": 262144, + "created_at": "2026-01-27T04:11:16+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], "default_system": null, - "description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2:\n\n- Significantly more knowledge and has greatly improved capabilities in coding and mathematics, thanks to our specialized expert models in these domains.\n\n- Significant improvements in instruction following, generating long texts (over 8K tokens), understanding structured data (e.g, tables), and generating structured outputs especially JSON. More resilient to the diversity of system prompts, enhancing role-play implementation and condition-setting for chatbots.\n\n- Long-context Support up to 128K tokens and can generate up to 8K tokens.\n\n- Multilingual support for over 29 languages, including Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic, and more.\n\nUsage of this model is subject to [Tongyi Qianwen LICENSE AGREEMENT](https://huggingface.co/Qwen/Qwen1.5-110B-Chat/blob/main/LICENSE).", - "features": {}, - "group": "Qwen", + "description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed visual and text tokens, it delivers strong performance in general reasoning, visual coding, and agentic tool-calling.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-72B-Instruct", + "hf_slug": "moonshotai/Kimi-K2.5", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], - "instruct_type": "chatml", + "input_modalities": ["text", "image"], + "instruct_type": null, "model_version_group_id": null, - "name": "Qwen2.5 72B Instruct", + "name": "MoonshotAI: Kimi K2.5", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-72b-instruct", - "reasoning_config": null, + "permaslug": "moonshotai/kimi-k2.5-0127", + "reasoning_config": { + "end_token": null, + "start_token": null + }, "router": null, - "short_name": "Qwen2.5 72B Instruct", - "slug": "qwen/qwen-2.5-72b-instruct", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", "warning_message": null }, - "model_variant_permaslug": "qwen/qwen-2.5-72b-instruct", - "model_variant_slug": "qwen/qwen-2.5-72b-instruct", + "model_variant_permaslug": "moonshotai/kimi-k2.5-0127", + "model_variant_slug": "moonshotai/kimi-k2.5", "moderation_required": false, - "name": "Together | qwen/qwen-2.5-72b-instruct", + "name": "Venice | moonshotai/kimi-k2.5-0127", "pricing": { - "completion": "0.0000012", + "completion": "0.00000375", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000012", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000000125", + "prompt": "0.00000075" }, - "provider_display_name": "Together", + "provider_display_name": "Venice", "provider_info": { - "adapterName": "TogetherAdapter", - "baseUrl": "https://api.together.xyz/v1", + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.together.ai/privacy", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.together.ai/terms-of-service", + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, - "displayName": "Together", + "displayName": "Venice", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.together.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" }, "ignoredProviderModels": [ - "mixedbread-ai/Mxbai-Rerank-Large-V2", - "BAAI/bge-base-en-v1.5-vllm", - "scb10x/scb10x-typhoon-2-1-gemma3-12b", - "arcee-ai/AFM-4.5B-Preview", - "deepseek-ai/DeepSeek-R1-0528-tput", - "black-forest-labs/FLUX.1-kontext-dev", - "google/gemma-3-27b-it", - "Qwen/Qwen3-32B-FP8", - "openai/whisper-large-v3", - "eddie/Qwen3-32B", - "eddiehou/meta-llama/Llama-3.1-405B", - "yan/deepseek-ai-deepseek-v3", - "moz/Llama-3.3-70B-Instruct-Turbo", - "serverless-qwen-qwen3-32b-fp8", - "qwen-qwen3-32b-fp8-serverless", - "moz-llama-3-3-70b-instruct-turbo", - "moonshotai/Kimi-K2-Instruct-tgl-testing", - "VirtueAIxTogether/VirtueGuard-Text-Lite", - "Virtue-AI/VirtueGuard-Text-Lite", - "black-forest-labs/FLUX.1-krea-dev", - "prosus/qwen-qwen3-32b-fp8-long-context", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-batch", - "deepseek-ai/DeepSeek-R1-DE", - "arize-ai/qwen-2-1.5b-instruct", - "openai/gpt-oss-120b", - "meta-llama/Llama-3-70b-hf", - "Qwen/Qwen2.5-72B-Instruct", - "meta-llama/Meta-Llama-3.1-70B-Instruct-Reference", - "meta-llama/Llama-3.2-1B-Instruct", - "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference", - "meta-llama/Meta-Llama-3-8B-Instruct", - "meta-llama/Llama-3.1-405B-Instruct", - "cartesia/sonic", - "cartesia/sonic-2", - "togethercomputer/MoA-1", - "Salesforce/Llama-Rank-V1", - "black-forest-labs/FLUX.1-schnell", - "lgai/exaone-3-5-32b-instruct", - "lgai/exaone-deep-32b", - "black-forest-labs/FLUX.1-dev", - "marin-community/marin-8b-instruct", - "togethercomputer/Refuel-Llm-V2-Small", - "meta-llama/Llama-3-70b-chat-hf", - "Alibaba-NLP/gte-modernbert-base", - "black-forest-labs/FLUX.1-pro", - "black-forest-labs/FLUX.1.1-pro", - "togethercomputer/MoA-1-Turbo", - "black-forest-labs/FLUX.1-dev-lora", - "meta-llama/Llama-2-70b-hf", - "togethercomputer/m2-bert-80M-32k-retrieval", - "togethercomputer/Refuel-Llm-V2", - "intfloat/multilingual-e5-large-instruct", - "black-forest-labs/FLUX.1-kontext-max", - "black-forest-labs/FLUX.1-schnell-Free", - "black-forest-labs/FLUX.1-kontext-pro", - "BAAI/bge-large-en-v1.5", - "BAAI/bge-base-en-v1.5", - "Qwen/Qwen3-Next-80B-A3B-Instruct", - "ServiceNow-AI/Apriel-1.5-15b-Thinker", - "openai/whisper-large-v3-test", - "codellama/CodeLlama-34b-Instruct-hf", - "meta-llama/Llama-2-13b-chat-hf", - "togethercomputer/m2-bert-80M-8k-retrieval", - "WhereIsAI/UAE-Large-V1", - "meta-llama/Meta-Llama-3.1-405B-Instruct-Lite-Pro", - "togethercomputer/m2-bert-80M-2k-retrieval", - "meta-llama/Meta-Llama-3-70B-Instruct-Lite", - "Meta-Llama/Llama-Guard-7b", - "Gryphe/MythoMax-L2-13b-Lite", - "upstage/SOLAR-10.7B-Instruct-v1.0", - "Qwen/Qwen2.5-14B-Instruct", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free", - "zai-org/GLM-4.5-Air-FP8", - "kwaivgI/kling-1.6-standard", - "HiDream-ai/HiDream-I1-Full", - "ByteDance-Seed/Seedream-3.0", - "ByteDance-Seed/Seedream-4.0", - "Lykon/DreamShaper", - "HiDream-ai/HiDream-I1-Dev", - "Qwen/Qwen-Image", - "RunDiffusion/Juggernaut-pro-flux", - "google/imagen-4.0-preview", - "google/imagen-4.0-ultra", - "google/veo-3.0", - "minimax/hailuo-02", - "stabilityai/stable-diffusion-3-medium", - "black-forest-labs/FLUX.1-Canny-pro", - "google/imagen-4.0-fast", - "minimax/video-01-director", - "HiDream-ai/HiDream-I1-Fast", - "Wan-AI/Wan2.2-T2V-A14B", - "ByteDance/Seedance-1.0-pro", - "google/veo-3.0-fast-audio", - "vidu/vidu-q1", - "kwaivgI/kling-2.1-master", - "google/veo-3.0-audio", - "Rundiffusion/Juggernaut-Lightning-Flux", - "Wan-AI/Wan2.2-I2V-A14B", - "google/flash-image-2.5", - "google/veo-2.0", - "openai/sora-2", - "google/veo-3.0-fast", - "ideogram/ideogram-3.0", - "kwaivgI/kling-2.0-master", - "kwaivgI/kling-2.1-standard", - "pixverse/pixverse-v5", - "stabilityai/stable-diffusion-xl-base-1.0", - "openai/sora-2-pro", - "ByteDance/Seedance-1.0-lite", - "kwaivgI/kling-1.6-pro", - "vidu/vidu-2.0", - "kwaivgI/kling-2.1-pro", - "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", - "canopylabs/orpheus-3b-0.1-ft", - "hexgrad/Kokoro-82M", - "eddie/gemma-2b-it", - "mistralai/Voxtral-Mini-3B-2507", - "arcee-ai/coder-large", - "arcee-ai/virtuoso-large", - "arcee-ai/maestro-reasoning", - "deepcogito/cogito-v2-1-671b", - "arcee_ai/arcee-spotlight", - "google/gemini-3-pro-image", - "mercor/cwm", - "black-forest-labs/FLUX.2-flex", - "keith-aditya/kimi-k2-instruct", - "pangram/mistral-small-2501", - "black-forest-labs/FLUX.2-pro", - "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" + ], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Venice", + "owners": ["{}"], + "slug": "venice", + "statusPageUrl": null + }, + "provider_model_id": "kimi-k2-5", + "provider_name": "Venice", + "provider_region": null, + "provider_slug": "venice", + "quantization": "unknown", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "top_k", + "response_format", + "structured_outputs", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": null, + "start_token": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "moonshotai/Kimi-K2.5", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "MoonshotAI: Kimi K2.5", + "output_modalities": ["text"], + "permaslug": "moonshotai/kimi-k2.5-0127", + "reasoning_config": { + "end_token": null, + "start_token": null + }, + "router": null, + "short_name": "Kimi K2.5", + "slug": "moonshotai/kimi-k2.5", + "updated_at": "2026-01-27T13:17:31.719721+00:00", + "warning_message": null + }, + { + "author": "nousresearch", + "context_length": 131072, + "created_at": "2024-08-16T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_system": null, + "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.", + "endpoint": { + "adapter_name": "VeniceAdapter", + "can_abort": true, + "context_length": 131072, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", + "training": false + }, + "features": { + "disable_free_endpoint_limits": true, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "196b4f57-a8ce-493a-8248-a24505c2862d", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": true, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": 8, + "limit_rpm_cf": null, + "max_completion_tokens": null, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "nousresearch", + "context_length": 131072, + "created_at": "2024-08-16T00:00:00+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>", "<|endoftext|>"], + "default_system": null, + "description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the board.\n\nHermes 3 405B is a frontier-level, full-parameter finetune of the Llama-3.1 405B foundation model, focused on aligning LLMs to the user, with powerful steering capabilities and control given to the end user.\n\nThe Hermes 3 series builds and expands on the Hermes 2 set of capabilities, including more powerful and reliable function calling and structured output capabilities, generalist assistant capabilities, and improved code generation skills.\n\nHermes 3 is competitive, if not superior, to Llama-3.1 Instruct models at general capabilities, with varying strengths and weaknesses attributable between the two.", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "NousResearch/Hermes-3-Llama-3.1-405B", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "chatml", + "model_version_group_id": null, + "name": "Nous: Hermes 3 405B Instruct", + "output_modalities": ["text"], + "permaslug": "nousresearch/hermes-3-llama-3.1-405b", + "reasoning_config": null, + "router": null, + "short_name": "Hermes 3 405B Instruct", + "slug": "nousresearch/hermes-3-llama-3.1-405b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "nousresearch/hermes-3-llama-3.1-405b:free", + "model_variant_slug": "nousresearch/hermes-3-llama-3.1-405b:free", + "moderation_required": false, + "name": "Venice | nousresearch/hermes-3-llama-3.1-405b:free", + "pricing": { + "completion": "0", + "discount": 0, + "prompt": "0" + }, + "provider_display_name": "Venice (Beta)", + "provider_info": { + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", + "training": false + }, + "displayName": "Venice (Beta)", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": false, + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" + }, + "ignoredProviderModels": [ + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Together", + "name": "Venice", "owners": ["{}"], - "slug": "together", - "statusPageUrl": "https://status.together.ai/" + "slug": "venice/beta", + "statusPageUrl": null }, - "provider_model_id": "Qwen/Qwen2.5-72B-Instruct-Turbo", - "provider_name": "Together", + "provider_model_id": "hermes-3-llama-3.1-405b", + "provider_name": "Venice", "provider_region": null, - "provider_slug": "together/fp8", + "provider_slug": "venice/beta", "quantization": "fp8", "supported_parameters": [ "max_tokens", @@ -155491,60 +154151,58 @@ "stop", "frequency_penalty", "presence_penalty", - "top_k", - "repetition_penalty", - "logit_bias", - "min_p" + "top_k" ], "supports_multipart": true, "supports_reasoning": false, "supports_tool_parameters": false, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": {}, - "group": "Qwen", + "group": "Llama3", "has_text_output": true, - "hf_slug": "Qwen/Qwen2.5-72B-Instruct", + "hf_slug": "NousResearch/Hermes-3-Llama-3.1-405B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": "chatml", "model_version_group_id": null, - "name": "Qwen2.5 72B Instruct", + "name": "Nous: Hermes 3 405B Instruct (free)", "output_modalities": ["text"], - "permaslug": "qwen/qwen-2.5-72b-instruct", + "permaslug": "nousresearch/hermes-3-llama-3.1-405b", "reasoning_config": null, "router": null, - "short_name": "Qwen2.5 72B Instruct", - "slug": "qwen/qwen-2.5-72b-instruct", + "short_name": "Hermes 3 405B Instruct (free)", + "slug": "nousresearch/hermes-3-llama-3.1-405b", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "z-ai", - "context_length": 202752, - "created_at": "2025-09-30T12:32:56.306946+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": null - }, - "default_stops": [], + "author": "qwen", + "context_length": 40960, + "created_at": "2025-04-30T16:38:24.032465+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "Qwen3-4B is a 4 billion parameter dense language model from the Qwen3 series, designed to support both general-purpose and reasoning-intensive tasks. It introduces a dual-mode architecture—thinking and non-thinking—allowing dynamic switching between high-precision logical reasoning and efficient dialogue generation. This makes it well-suited for multi-turn chat, instruction following, and complex agent workflows.", "endpoint": { - "adapter_name": "TogetherAdapter", + "adapter_name": "VeniceAdapter", "can_abort": true, - "context_length": 202752, + "context_length": 40960, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://www.together.ai/privacy", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.together.ai/terms-of-service", + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, "features": { + "disable_free_endpoint_limits": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -155553,244 +154211,129 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "227db9b5-e0d2-4053-9b23-fb3ad58e14ac", + "has_completions": false, + "id": "2e98edb5-b21b-455b-afb4-d5c01aad515d", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 8, "limit_rpm_cf": null, "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "z-ai", - "context_length": 200000, - "created_at": "2025-09-30T12:32:56.306946+00:00", - "default_parameters": { - "frequency_penalty": null, - "temperature": 0.6, - "top_p": null - }, - "default_stops": [], + "author": "qwen", + "context_length": 128000, + "created_at": "2025-04-30T16:38:24.032465+00:00", + "default_parameters": {}, + "default_stops": ["<|im_start|>", "<|im_end|>"], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "Qwen3-4B is a 4 billion parameter dense language model from the Qwen3 series, designed to support both general-purpose and reasoning-intensive tasks. It introduces a dual-mode architecture—thinking and non-thinking—allowing dynamic switching between high-precision logical reasoning and efficient dialogue generation. This makes it well-suited for multi-turn chat, instruction following, and complex agent workflows.", "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-4B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "Qwen: Qwen3 4B", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "qwen/qwen3-4b-04-28", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "Qwen3 4B", + "slug": "qwen/qwen3-4b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6", - "model_variant_slug": "z-ai/glm-4.6", + "model_variant_permaslug": "qwen/qwen3-4b-04-28:free", + "model_variant_slug": "qwen/qwen3-4b:free", "moderation_required": false, - "name": "Together | z-ai/glm-4.6", + "name": "Venice | qwen/qwen3-4b-04-28:free", "pricing": { - "completion": "0.0000022", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Together", + "provider_display_name": "Venice", "provider_info": { - "adapterName": "TogetherAdapter", - "baseUrl": "https://api.together.xyz/v1", + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://www.together.ai/privacy", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", "retainsPrompts": false, - "termsOfServiceURL": "https://www.together.ai/terms-of-service", + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, - "displayName": "Together", + "displayName": "Venice", "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.together.ai/&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" }, "ignoredProviderModels": [ - "mixedbread-ai/Mxbai-Rerank-Large-V2", - "BAAI/bge-base-en-v1.5-vllm", - "scb10x/scb10x-typhoon-2-1-gemma3-12b", - "arcee-ai/AFM-4.5B-Preview", - "deepseek-ai/DeepSeek-R1-0528-tput", - "black-forest-labs/FLUX.1-kontext-dev", - "google/gemma-3-27b-it", - "Qwen/Qwen3-32B-FP8", - "openai/whisper-large-v3", - "eddie/Qwen3-32B", - "eddiehou/meta-llama/Llama-3.1-405B", - "yan/deepseek-ai-deepseek-v3", - "moz/Llama-3.3-70B-Instruct-Turbo", - "serverless-qwen-qwen3-32b-fp8", - "qwen-qwen3-32b-fp8-serverless", - "moz-llama-3-3-70b-instruct-turbo", - "moonshotai/Kimi-K2-Instruct-tgl-testing", - "VirtueAIxTogether/VirtueGuard-Text-Lite", - "Virtue-AI/VirtueGuard-Text-Lite", - "black-forest-labs/FLUX.1-krea-dev", - "prosus/qwen-qwen3-32b-fp8-long-context", - "meta-llama/Llama-4-Scout-17B-16E-Instruct-batch", - "deepseek-ai/DeepSeek-R1-DE", - "arize-ai/qwen-2-1.5b-instruct", - "openai/gpt-oss-120b", - "meta-llama/Llama-3-70b-hf", - "Qwen/Qwen2.5-72B-Instruct", - "meta-llama/Meta-Llama-3.1-70B-Instruct-Reference", - "meta-llama/Llama-3.2-1B-Instruct", - "meta-llama/Meta-Llama-3.1-8B-Instruct-Reference", - "meta-llama/Meta-Llama-3-8B-Instruct", - "meta-llama/Llama-3.1-405B-Instruct", - "cartesia/sonic", - "cartesia/sonic-2", - "togethercomputer/MoA-1", - "Salesforce/Llama-Rank-V1", - "black-forest-labs/FLUX.1-schnell", - "lgai/exaone-3-5-32b-instruct", - "lgai/exaone-deep-32b", - "black-forest-labs/FLUX.1-dev", - "marin-community/marin-8b-instruct", - "togethercomputer/Refuel-Llm-V2-Small", - "meta-llama/Llama-3-70b-chat-hf", - "Alibaba-NLP/gte-modernbert-base", - "black-forest-labs/FLUX.1-pro", - "black-forest-labs/FLUX.1.1-pro", - "togethercomputer/MoA-1-Turbo", - "black-forest-labs/FLUX.1-dev-lora", - "meta-llama/Llama-2-70b-hf", - "togethercomputer/m2-bert-80M-32k-retrieval", - "togethercomputer/Refuel-Llm-V2", - "intfloat/multilingual-e5-large-instruct", - "black-forest-labs/FLUX.1-kontext-max", - "black-forest-labs/FLUX.1-schnell-Free", - "black-forest-labs/FLUX.1-kontext-pro", - "BAAI/bge-large-en-v1.5", - "BAAI/bge-base-en-v1.5", - "Qwen/Qwen3-Next-80B-A3B-Instruct", - "ServiceNow-AI/Apriel-1.5-15b-Thinker", - "openai/whisper-large-v3-test", - "codellama/CodeLlama-34b-Instruct-hf", - "meta-llama/Llama-2-13b-chat-hf", - "togethercomputer/m2-bert-80M-8k-retrieval", - "WhereIsAI/UAE-Large-V1", - "meta-llama/Meta-Llama-3.1-405B-Instruct-Lite-Pro", - "togethercomputer/m2-bert-80M-2k-retrieval", - "meta-llama/Meta-Llama-3-70B-Instruct-Lite", - "Meta-Llama/Llama-Guard-7b", - "Gryphe/MythoMax-L2-13b-Lite", - "upstage/SOLAR-10.7B-Instruct-v1.0", - "Qwen/Qwen2.5-14B-Instruct", - "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", - "deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free", - "zai-org/GLM-4.5-Air-FP8", - "kwaivgI/kling-1.6-standard", - "HiDream-ai/HiDream-I1-Full", - "ByteDance-Seed/Seedream-3.0", - "ByteDance-Seed/Seedream-4.0", - "Lykon/DreamShaper", - "HiDream-ai/HiDream-I1-Dev", - "Qwen/Qwen-Image", - "RunDiffusion/Juggernaut-pro-flux", - "google/imagen-4.0-preview", - "google/imagen-4.0-ultra", - "google/veo-3.0", - "minimax/hailuo-02", - "stabilityai/stable-diffusion-3-medium", - "black-forest-labs/FLUX.1-Canny-pro", - "google/imagen-4.0-fast", - "minimax/video-01-director", - "HiDream-ai/HiDream-I1-Fast", - "Wan-AI/Wan2.2-T2V-A14B", - "ByteDance/Seedance-1.0-pro", - "google/veo-3.0-fast-audio", - "vidu/vidu-q1", - "kwaivgI/kling-2.1-master", - "google/veo-3.0-audio", - "Rundiffusion/Juggernaut-Lightning-Flux", - "Wan-AI/Wan2.2-I2V-A14B", - "google/flash-image-2.5", - "google/veo-2.0", - "openai/sora-2", - "google/veo-3.0-fast", - "ideogram/ideogram-3.0", - "kwaivgI/kling-2.0-master", - "kwaivgI/kling-2.1-standard", - "pixverse/pixverse-v5", - "stabilityai/stable-diffusion-xl-base-1.0", - "openai/sora-2-pro", - "ByteDance/Seedance-1.0-lite", - "kwaivgI/kling-1.6-pro", - "vidu/vidu-2.0", - "kwaivgI/kling-2.1-pro", - "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", - "canopylabs/orpheus-3b-0.1-ft", - "hexgrad/Kokoro-82M", - "eddie/gemma-2b-it", - "mistralai/Voxtral-Mini-3B-2507", - "arcee-ai/coder-large", - "arcee-ai/virtuoso-large", - "arcee-ai/maestro-reasoning", - "deepcogito/cogito-v2-1-671b", - "arcee_ai/arcee-spotlight", - "google/gemini-3-pro-image", - "mercor/cwm", - "black-forest-labs/FLUX.2-flex", - "keith-aditya/kimi-k2-instruct", - "pangram/mistral-small-2501", - "black-forest-labs/FLUX.2-pro", - "black-forest-labs/FLUX.2-dev", - "ServiceNow-AI/Apriel-1.6-15b-Thinker" + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Together", + "name": "Venice", "owners": ["{}"], - "slug": "together", - "statusPageUrl": "https://status.together.ai/" + "slug": "venice", + "statusPageUrl": null }, - "provider_model_id": "zai-org/GLM-4.6", - "provider_name": "Together", + "provider_model_id": "qwen3-4b", + "provider_name": "Venice", "provider_region": null, - "provider_slug": "together", - "quantization": "unknown", + "provider_slug": "venice/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", "top_p", @@ -155798,105 +154341,63 @@ "frequency_penalty", "presence_penalty", "top_k", - "repetition_penalty", - "logit_bias", - "min_p", "tools", - "tool_choice", - "structured_outputs", - "response_format" + "tool_choice" ], "supports_multipart": true, "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { - "chat_template_config": {}, "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" } }, - "group": "Other", + "group": "Qwen3", "has_text_output": true, - "hf_slug": null, + "hf_slug": "Qwen/Qwen3-4B", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": null, + "instruct_type": "qwen3", "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "Qwen: Qwen3 4B (free)", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "qwen/qwen3-4b-04-28", "reasoning_config": { "end_token": "", - "start_token": "", - "system_prompt": null + "start_token": "" }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "Qwen3 4B (free)", + "slug": "qwen/qwen3-4b", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null - } - ], - "name": "Together", - "slug": "together" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": false, - "training": false - }, - "displayName": "Venice", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" - }, - "models": [], - "name": "Venice", - "slug": "venice" - }, - { - "dataPolicy": { - "canPublish": false, - "retainsPrompts": true, - "training": false - }, - "datacenters": ["US"], - "displayName": "Weights & Biases", - "headquarters": "US", - "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://wandb.ai/home&size=256" - }, - "models": [ + }, { - "author": "deepseek", - "context_length": 161000, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "author": "qwen", + "context_length": 262000, + "created_at": "2025-07-23T00:29:06+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", "endpoint": { - "adapter_name": "WandbAdapter", + "adapter_name": "VeniceAdapter", "can_abort": true, - "context_length": 161000, + "context_length": 262000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://wandb.ai/site/privacy/", - "retainsPrompts": true, - "termsOfServiceURL": "https://site.wandb.ai/terms/", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "disable_free_endpoint_limits": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -155905,106 +154406,134 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "78b3f653-4585-4c35-9a42-e3da2d6597ad", + "has_completions": false, + "id": "a9bbd882-011f-4606-8f60-85f3cb642586", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 8, "limit_rpm_cf": null, - "max_completion_tokens": 161000, + "max_completion_tokens": 262000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-03-24T13:59:15.252028+00:00", + "author": "qwen", + "context_length": 1048576, + "created_at": "2025-07-23T00:29:06+00:00", "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", - "features": {}, - "group": "DeepSeek", + "description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over repositories. The model features 480 billion total parameters, with 35 billion active per forward pass (8 out of 160 experts).\n\nPricing for the Alibaba endpoints varies by context length. Once a request is greater than 128k input tokens, the higher pricing is used.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "model_version_group_id": null, + "name": "Qwen: Qwen3 Coder 480B A35B", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", - "reasoning_config": null, + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "Qwen3 Coder 480B A35B", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", - "model_variant_slug": "deepseek/deepseek-chat-v3-0324", + "model_variant_permaslug": "qwen/qwen3-coder-480b-a35b-07-25:free", + "model_variant_slug": "qwen/qwen3-coder:free", "moderation_required": false, - "name": "WandB | deepseek/deepseek-chat-v3-0324", + "name": "Venice | qwen/qwen3-coder-480b-a35b-07-25:free", "pricing": { - "completion": "0.00000275", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000114", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Weights & Biases", + "provider_display_name": "Venice (Beta)", "provider_info": { - "adapterName": "WandbAdapter", - "baseUrl": "https://api.inference.wandb.ai/v1", + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://wandb.ai/site/privacy/", - "retainsPrompts": true, - "termsOfServiceURL": "https://site.wandb.ai/terms/", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, - "displayName": "Weights & Biases", - "editors": [], + "displayName": "Venice (Beta)", + "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://wandb.ai/home&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" + ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "WandB", - "owners": ["org_35lC0lqFZXiNrrxF1OW68kxpbX9"], - "slug": "wandb", + "name": "Venice", + "owners": ["{}"], + "slug": "venice/beta", "statusPageUrl": null }, - "provider_model_id": "deepseek-ai/DeepSeek-V3-0324", - "provider_name": "WandB", + "provider_model_id": "qwen3-coder-480b-a35b-instruct", + "provider_name": "Venice", "provider_region": null, - "provider_slug": "wandb/fp8", + "provider_slug": "venice/beta", "quantization": "fp8", "supported_parameters": [ - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "top_k", - "repetition_penalty", + "stop", "frequency_penalty", "presence_penalty", - "stop", - "seed", + "top_k", "tools", "tool_choice" ], @@ -156012,51 +154541,58 @@ "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, - "features": {}, - "group": "DeepSeek", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Qwen3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_slug": "Qwen/Qwen3-Coder-480B-A35B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, - "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", - "name": "DeepSeek: DeepSeek V3 0324", + "model_version_group_id": null, + "name": "Qwen: Qwen3 Coder 480B A35B (free)", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3-0324", - "reasoning_config": null, + "permaslug": "qwen/qwen3-coder-480b-a35b-07-25", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "DeepSeek V3 0324", - "slug": "deepseek/deepseek-chat-v3-0324", + "short_name": "Qwen3 Coder 480B A35B (free)", + "slug": "qwen/qwen3-coder", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "deepseek", - "context_length": 161000, - "created_at": "2025-08-21T12:33:48+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "endpoint": { - "adapter_name": "WandbAdapter", + "adapter_name": "VeniceAdapter", "can_abort": true, - "context_length": 161000, + "context_length": 262144, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://wandb.ai/site/privacy/", - "retainsPrompts": true, - "termsOfServiceURL": "https://site.wandb.ai/terms/", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, "features": { - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "disable_free_endpoint_limits": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -156065,182 +154601,199 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "a50fbc5c-1405-4940-92ad-c455b0396643", + "has_completions": false, + "id": "94248808-ba97-4e3c-be60-1cb0928db51d", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 8, "limit_rpm_cf": null, - "max_completion_tokens": 161000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "deepseek", - "context_length": 131072, - "created_at": "2025-08-21T12:33:48+00:00", + "author": "qwen", + "context_length": 262144, + "created_at": "2025-09-11T17:36:53.6379+00:00", "default_parameters": {}, - "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_stops": [], "default_system": null, - "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual use, while remaining robust on alignment and formatting. Compared with prior Qwen3 instruct variants, it focuses on higher throughput and stability on ultra-long inputs and multi-turn dialogues, making it well-suited for RAG, tool use, and agentic workflows that require consistent final answers rather than visible chain-of-thought.\n\nThe model employs scaling-efficient training and decoding to improve parameter efficiency and inference speed, and has been validated on a broad set of public benchmarks where it reaches or approaches larger Qwen3 systems in several categories while outperforming earlier mid-sized baselines. It is best used as a general assistant, code helper, and long-context task solver in production settings where deterministic, instruction-following outputs are preferred.", "features": { "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "DeepSeek", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1", + "name": "Qwen: Qwen3 Next 80B A3B Instruct", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3.1", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.1", - "slug": "deepseek/deepseek-chat-v3.1", + "short_name": "Qwen3 Next 80B A3B Instruct", + "slug": "qwen/qwen3-next-80b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "deepseek/deepseek-chat-v3.1", - "model_variant_slug": "deepseek/deepseek-chat-v3.1", + "model_variant_permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509:free", + "model_variant_slug": "qwen/qwen3-next-80b-a3b-instruct:free", "moderation_required": false, - "name": "WandB | deepseek/deepseek-chat-v3.1", + "name": "Venice | qwen/qwen3-next-80b-a3b-instruct-2509:free", "pricing": { - "completion": "0.00000165", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0", - "internal_reasoning": "0", - "prompt": "0.00000055", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Weights & Biases", + "provider_display_name": "Venice (Beta)", "provider_info": { - "adapterName": "WandbAdapter", - "baseUrl": "https://api.inference.wandb.ai/v1", + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://wandb.ai/site/privacy/", - "retainsPrompts": true, - "termsOfServiceURL": "https://site.wandb.ai/terms/", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, - "displayName": "Weights & Biases", - "editors": [], + "displayName": "Venice (Beta)", + "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://wandb.ai/home&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" + ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "WandB", - "owners": ["org_35lC0lqFZXiNrrxF1OW68kxpbX9"], - "slug": "wandb", + "name": "Venice", + "owners": ["{}"], + "slug": "venice/beta", "statusPageUrl": null }, - "provider_model_id": "deepseek-ai/DeepSeek-V3.1", - "provider_name": "WandB", + "provider_model_id": "qwen3-next-80b", + "provider_name": "Venice", "provider_region": null, - "provider_slug": "wandb/fp8", - "quantization": "fp8", + "provider_slug": "venice/beta", + "quantization": "fp16", "supported_parameters": [ - "reasoning", - "include_reasoning", - "structured_outputs", - "response_format", "max_tokens", "temperature", "top_p", - "top_k", - "repetition_penalty", + "stop", "frequency_penalty", "presence_penalty", - "stop", - "seed", + "top_k", "tools", - "tool_choice" + "tool_choice", + "structured_outputs", + "response_format" ], "supports_multipart": true, - "supports_reasoning": true, + "supports_reasoning": false, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, "features": { "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null } }, - "group": "DeepSeek", + "group": "Qwen3", "has_text_output": true, - "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_slug": "Qwen/Qwen3-Next-80B-A3B-Instruct", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "deepseek-v3.1", + "instruct_type": null, "model_version_group_id": null, - "name": "DeepSeek: DeepSeek V3.1", + "name": "Qwen: Qwen3 Next 80B A3B Instruct (free)", "output_modalities": ["text"], - "permaslug": "deepseek/deepseek-chat-v3.1", + "permaslug": "qwen/qwen3-next-80b-a3b-instruct-2509", "reasoning_config": { - "end_token": "", - "start_token": "", + "end_token": null, + "start_token": null, "system_prompt": null }, "router": null, - "short_name": "DeepSeek V3.1", - "slug": "deepseek/deepseek-chat-v3.1", + "short_name": "Qwen3 Next 80B A3B Instruct (free)", + "slug": "qwen/qwen3-next-80b-a3b-instruct", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "meta-llama", - "context_length": 128000, - "created_at": "2024-12-06T17:28:57.828422+00:00", + "author": "venice", + "context_length": 32768, + "created_at": "2025-07-09T21:02:46.328189+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an “uncensored” instruct-tuned LLM, preserving user control over alignment, system prompts, and behavior. Intended for advanced and unrestricted use cases, Venice Uncensored emphasizes steerability and transparent behavior, removing default safety and alignment layers typically found in mainstream assistant models.", "endpoint": { - "adapter_name": "WandbAdapter", + "adapter_name": "VeniceAdapter", "can_abort": true, - "context_length": 128000, + "context_length": 32768, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://wandb.ai/site/privacy/", - "retainsPrompts": true, - "termsOfServiceURL": "https://site.wandb.ai/terms/", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, "features": { + "disable_free_endpoint_limits": true, "supported_parameters": { "response_format": true, "structured_outputs": true }, - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -156249,93 +154802,125 @@ } }, "has_chat_completions": true, - "has_completions": true, - "id": "4420efea-38fe-4ec1-a7c6-c6dbcdeea8aa", + "has_completions": false, + "id": "2d9e49f9-2147-4259-9871-4f6b6f181976", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": false, + "is_free": true, "is_hidden": false, "limit_rpd": null, - "limit_rpm": null, + "limit_rpm": 8, "limit_rpm_cf": null, - "max_completion_tokens": 128000, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "meta-llama", - "context_length": 131072, - "created_at": "2024-12-06T17:28:57.828422+00:00", + "author": "venice", + "context_length": 32768, + "created_at": "2025-07-09T21:02:46.328189+00:00", "default_parameters": {}, - "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_stops": [], "default_system": null, - "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", - "features": {}, - "group": "Llama3", + "description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an “uncensored” instruct-tuned LLM, preserving user control over alignment, system prompts, and behavior. Intended for advanced and unrestricted use cases, Venice Uncensored emphasizes steerability and transparent behavior, removing default safety and alignment layers typically found in mainstream assistant models.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Venice: Uncensored", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "venice/uncensored", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", + "short_name": "Uncensored", + "slug": "cognitivecomputations/dolphin-mistral-24b-venice-edition", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, - "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", - "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_permaslug": "venice/uncensored:free", + "model_variant_slug": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free", "moderation_required": false, - "name": "WandB | meta-llama/llama-3.3-70b-instruct", + "name": "Venice | venice/uncensored:free", "pricing": { - "completion": "0.00000071", + "completion": "0", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000071", - "request": "0", - "web_search": "0" + "prompt": "0" }, - "provider_display_name": "Weights & Biases", + "provider_display_name": "Venice", "provider_info": { - "adapterName": "WandbAdapter", - "baseUrl": "https://api.inference.wandb.ai/v1", + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", "byokEnabled": true, "dataPolicy": { "canPublish": false, - "privacyPolicyURL": "https://wandb.ai/site/privacy/", - "retainsPrompts": true, - "termsOfServiceURL": "https://site.wandb.ai/terms/", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, - "displayName": "Weights & Biases", - "editors": [], + "displayName": "Venice", + "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": true, - "headquarters": "US", + "hasCompletions": false, "icon": { - "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://wandb.ai/home&size=256" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" }, - "ignoredProviderModels": [], + "ignoredProviderModels": [ + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" + ], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "WandB", - "owners": ["org_35lC0lqFZXiNrrxF1OW68kxpbX9"], - "slug": "wandb", + "name": "Venice", + "owners": ["{}"], + "slug": "venice", "statusPageUrl": null }, - "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct", - "provider_name": "WandB", + "provider_model_id": "venice-uncensored", + "provider_name": "Venice", "provider_region": null, - "provider_slug": "wandb/fp16", + "provider_slug": "venice/fp16", "quantization": "fp16", "supported_parameters": [ "structured_outputs", @@ -156343,102 +154928,104 @@ "max_tokens", "temperature", "top_p", - "top_k", - "repetition_penalty", + "stop", "frequency_penalty", "presence_penalty", - "stop", - "seed", - "tools", - "tool_choice" + "top_k" ], "supports_multipart": true, "supports_reasoning": false, - "supports_tool_parameters": true, + "supports_tool_parameters": false, "variable_pricings": [], - "variant": "standard" + "variant": "free" }, - "features": {}, - "group": "Llama3", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Other", "has_text_output": true, - "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_slug": "cognitivecomputations/Dolphin-Mistral-24B-Venice-Edition", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], - "instruct_type": "llama3", - "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", - "name": "Meta: Llama 3.3 70B Instruct", + "instruct_type": null, + "model_version_group_id": null, + "name": "Venice: Uncensored (free)", "output_modalities": ["text"], - "permaslug": "meta-llama/llama-3.3-70b-instruct", - "reasoning_config": null, + "permaslug": "venice/uncensored", + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "Llama 3.3 70B Instruct", - "slug": "meta-llama/llama-3.3-70b-instruct", + "short_name": "Uncensored (free)", + "slug": "cognitivecomputations/dolphin-mistral-24b-venice-edition", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "endpoint": { - "adapter_name": "WandbAdapter", + "adapter_name": "VeniceAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 200000, "data_policy": { "canPublish": false, - "privacyPolicyURL": "https://wandb.ai/site/privacy/", - "retainsPrompts": true, - "termsOfServiceURL": "https://site.wandb.ai/terms/", + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", "training": false }, "features": { - "is_mandatory_reasoning": true, - "supported_parameters": { - "response_format": true, - "structured_outputs": true - }, + "reasoning_return_mechanism": "reasoning-content", "supports_tool_choice": { "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true + "literal_none": false, + "literal_required": false, + "type_function": false } }, "has_chat_completions": true, - "has_completions": true, - "id": "3ca6292f-cef7-48a2-bf52-988e934cab57", + "has_completions": false, + "id": "f99f5dca-50a5-4169-a758-60b2cd09d599", "is_byok": false, "is_deranked": false, "is_disabled": false, "is_free": false, "is_hidden": false, "limit_rpd": null, - "limit_rpm": 250, + "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 131072, + "max_completion_tokens": null, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "openai", - "context_length": 131072, - "created_at": "2025-08-05T17:17:11+00:00", + "author": "z-ai", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": null, - "top_p": null + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -156447,41 +155034,759 @@ "system_prompt": null } }, - "group": "GPT", + "group": "Other", "has_text_output": true, - "hf_slug": "openai/gpt-oss-120b", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "OpenAI: gpt-oss-120b", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "openai/gpt-oss-120b", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "gpt-oss-120b", - "slug": "openai/gpt-oss-120b", - "updated_at": "2026-01-08T19:23:52.555156+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, - "model_variant_permaslug": "openai/gpt-oss-120b", - "model_variant_slug": "openai/gpt-oss-120b", + "model_variant_permaslug": "z-ai/glm-4.7-20251222", + "model_variant_slug": "z-ai/glm-4.7", "moderation_required": false, - "name": "WandB | openai/gpt-oss-120b", + "name": "Venice | z-ai/glm-4.7-20251222", "pricing": { - "completion": "0.0000006", + "completion": "0.00000265", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000015", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000011", + "prompt": "0.00000055" + }, + "provider_display_name": "Venice", + "provider_info": { + "adapterName": "VeniceAdapter", + "baseUrl": "https://api.venice.ai/api/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://venice.ai/legal/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://venice.ai/legal/tos", + "training": false + }, + "displayName": "Venice", + "editors": ["{}"], + "hasChatCompletions": true, + "hasCompletions": false, + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://venice.ai/&size=256" + }, + "ignoredProviderModels": [ + "llama-3.2-3b", + "deepseek-coder-v2-lite", + "dolphin-2.9.2-qwen2-72b", + "mistral-32-24b", + "zai-org-glm-4.6", + "qwen3-235b-a22b-thinking-2507", + "qwen3-235b-a22b-instruct-2507", + "google-gemma-3-27b-it", + "openai-gpt-oss-120b", + "deepseek-ai-DeepSeek-R1", + "grok-41-fast", + "gemini-3-pro-preview", + "claude-opus-45", + "kimi-k2-thinking", + "deepseek-v3.2", + "openai-gpt-52", + "gemini-3-flash-preview", + "grok-code-fast-1", + "claude-sonnet-45", + "openai-gpt-52-codex", + "venice-uncensored", + "qwen3-4b", + "mistral-31-24b", + "qwen3-next-80b", + "qwen3-coder-480b-a35b-instruct", + "hermes-3-llama-3.1-405b", + "llama-3.3-70b" + ], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Venice", + "owners": ["{}"], + "slug": "venice", + "statusPageUrl": null + }, + "provider_model_id": "zai-org-glm-4.7", + "provider_name": "Venice", + "provider_region": null, + "provider_slug": "venice/fp4", + "quantization": "fp4", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "top_k", + "tools", + "tool_choice", + "structured_outputs", + "response_format" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "zai-org/GLM-4.7", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Z.AI: GLM 4.7", + "output_modalities": ["text"], + "permaslug": "z-ai/glm-4.7-20251222", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", + "warning_message": null + } + ], + "name": "Venice", + "slug": "venice" + }, + { + "dataPolicy": { + "canPublish": false, + "retainsPrompts": true, + "training": false + }, + "datacenters": ["US"], + "displayName": "Weights & Biases", + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://wandb.ai/home&size=256" + }, + "models": [ + { + "author": "deepseek", + "context_length": 161000, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, + "default_stops": [], + "default_system": null, + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "endpoint": { + "adapter_name": "WandbAdapter", + "can_abort": true, + "context_length": 161000, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://wandb.ai/site/privacy/", + "retainsPrompts": true, + "termsOfServiceURL": "https://site.wandb.ai/terms/", + "training": false + }, + "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "78b3f653-4585-4c35-9a42-e3da2d6597ad", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 161000, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-03-24T13:59:15.252028+00:00", + "default_parameters": {}, + "default_stops": [], + "default_system": null, + "description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team.\n\nIt succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well on a variety of tasks.", + "features": {}, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, + "router": null, + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "deepseek/deepseek-chat-v3-0324", + "model_variant_slug": "deepseek/deepseek-chat-v3-0324", + "moderation_required": false, + "name": "WandB | deepseek/deepseek-chat-v3-0324", + "pricing": { + "completion": "0.00000275", + "discount": 0, + "input_cache_read": "0.00000114", + "prompt": "0.00000114" + }, + "provider_display_name": "Weights & Biases", + "provider_info": { + "adapterName": "WandbAdapter", + "baseUrl": "https://api.inference.wandb.ai/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://wandb.ai/site/privacy/", + "retainsPrompts": true, + "termsOfServiceURL": "https://site.wandb.ai/terms/", + "training": false + }, + "displayName": "Weights & Biases", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": true, + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://wandb.ai/home&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "WandB", + "owners": ["org_35lC0lqFZXiNrrxF1OW68kxpbX9"], + "slug": "wandb", + "statusPageUrl": null + }, + "provider_model_id": "deepseek-ai/DeepSeek-V3-0324", + "provider_name": "WandB", + "provider_region": null, + "provider_slug": "wandb/fp8", + "quantization": "fp8", + "supported_parameters": [ + "structured_outputs", + "response_format", + "max_tokens", + "temperature", + "top_p", + "top_k", + "repetition_penalty", + "frequency_penalty", + "presence_penalty", + "stop", + "seed", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": {}, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3-0324", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": "be67b3ba-9d99-440c-ae90-6514d99b93ed", + "name": "DeepSeek: DeepSeek V3 0324", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-chat-v3-0324", + "reasoning_config": null, + "router": null, + "short_name": "DeepSeek V3 0324", + "slug": "deepseek/deepseek-chat-v3-0324", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + { + "author": "deepseek", + "context_length": 161000, + "created_at": "2025-08-21T12:33:48+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_system": null, + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "endpoint": { + "adapter_name": "WandbAdapter", + "can_abort": true, + "context_length": 161000, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://wandb.ai/site/privacy/", + "retainsPrompts": true, + "termsOfServiceURL": "https://site.wandb.ai/terms/", + "training": false + }, + "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "a50fbc5c-1405-4940-92ad-c455b0396643", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 161000, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "deepseek", + "context_length": 131072, + "created_at": "2025-08-21T12:33:48+00:00", + "default_parameters": {}, + "default_stops": ["<|User|>", "<|end▁of▁sentence|>"], + "default_system": null, + "description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context training process, reaching up to 128K tokens, and uses FP8 microscaling for efficient inference. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)\n\nThe model improves tool use, code generation, and reasoning efficiency, achieving performance comparable to DeepSeek-R1 on difficult benchmarks while responding more quickly. It supports structured tool calling, code agents, and search agents, making it suitable for research, coding, and agentic workflows. \n\nIt succeeds the [DeepSeek V3-0324](/deepseek/deepseek-chat-v3-0324) model and performs well on a variety of tasks.", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "deepseek-v3.1", + "model_version_group_id": null, + "name": "DeepSeek: DeepSeek V3.1", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-chat-v3.1", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "DeepSeek V3.1", + "slug": "deepseek/deepseek-chat-v3.1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "deepseek/deepseek-chat-v3.1", + "model_variant_slug": "deepseek/deepseek-chat-v3.1", + "moderation_required": false, + "name": "WandB | deepseek/deepseek-chat-v3.1", + "pricing": { + "completion": "0.00000165", + "discount": 0, + "input_cache_read": "0", + "prompt": "0.00000055" + }, + "provider_display_name": "Weights & Biases", + "provider_info": { + "adapterName": "WandbAdapter", + "baseUrl": "https://api.inference.wandb.ai/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://wandb.ai/site/privacy/", + "retainsPrompts": true, + "termsOfServiceURL": "https://site.wandb.ai/terms/", + "training": false + }, + "displayName": "Weights & Biases", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": true, + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://wandb.ai/home&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "WandB", + "owners": ["org_35lC0lqFZXiNrrxF1OW68kxpbX9"], + "slug": "wandb", + "statusPageUrl": null + }, + "provider_model_id": "deepseek-ai/DeepSeek-V3.1", + "provider_name": "WandB", + "provider_region": null, + "provider_slug": "wandb/fp8", + "quantization": "fp8", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", + "max_tokens", + "temperature", + "top_p", + "top_k", + "repetition_penalty", + "frequency_penalty", + "presence_penalty", + "stop", + "seed", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "DeepSeek", + "has_text_output": true, + "hf_slug": "deepseek-ai/DeepSeek-V3.1", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "deepseek-v3.1", + "model_version_group_id": null, + "name": "DeepSeek: DeepSeek V3.1", + "output_modalities": ["text"], + "permaslug": "deepseek/deepseek-chat-v3.1", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "DeepSeek V3.1", + "slug": "deepseek/deepseek-chat-v3.1", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + { + "author": "meta-llama", + "context_length": 128000, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_system": null, + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "endpoint": { + "adapter_name": "WandbAdapter", + "can_abort": true, + "context_length": 128000, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://wandb.ai/site/privacy/", + "retainsPrompts": true, + "termsOfServiceURL": "https://site.wandb.ai/terms/", + "training": false + }, + "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_input_audio": false, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "4420efea-38fe-4ec1-a7c6-c6dbcdeea8aa", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 128000, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "meta-llama", + "context_length": 131072, + "created_at": "2024-12-06T17:28:57.828422+00:00", + "default_parameters": {}, + "default_stops": ["<|eot_id|>", "<|end_of_text|>"], + "default_system": null, + "description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model is optimized for multilingual dialogue use cases and outperforms many of the available open source and closed chat models on common industry benchmarks.\n\nSupported languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai.\n\n[Model Card](https://github.com/meta-llama/llama-models/blob/main/models/llama3_3/MODEL_CARD.md)", + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, + "router": null, + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "meta-llama/llama-3.3-70b-instruct", + "model_variant_slug": "meta-llama/llama-3.3-70b-instruct", + "moderation_required": false, + "name": "WandB | meta-llama/llama-3.3-70b-instruct", + "pricing": { + "completion": "0.00000071", + "discount": 0, + "input_cache_read": "0.00000071", + "prompt": "0.00000071" + }, + "provider_display_name": "Weights & Biases", + "provider_info": { + "adapterName": "WandbAdapter", + "baseUrl": "https://api.inference.wandb.ai/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://wandb.ai/site/privacy/", + "retainsPrompts": true, + "termsOfServiceURL": "https://site.wandb.ai/terms/", + "training": false + }, + "displayName": "Weights & Biases", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": true, + "headquarters": "US", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://wandb.ai/home&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "WandB", + "owners": ["org_35lC0lqFZXiNrrxF1OW68kxpbX9"], + "slug": "wandb", + "statusPageUrl": null + }, + "provider_model_id": "meta-llama/Llama-3.3-70B-Instruct", + "provider_name": "WandB", + "provider_region": null, + "provider_slug": "wandb/fp16", + "quantization": "fp16", + "supported_parameters": [ + "structured_outputs", + "response_format", + "max_tokens", + "temperature", + "top_p", + "top_k", + "repetition_penalty", + "frequency_penalty", + "presence_penalty", + "stop", + "seed", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": false, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": {}, + "group": "Llama3", + "has_text_output": true, + "hf_slug": "meta-llama/Llama-3.3-70B-Instruct", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": "llama3", + "model_version_group_id": "397604e2-45fa-454e-a85d-9921f5138747", + "name": "Meta: Llama 3.3 70B Instruct", + "output_modalities": ["text"], + "permaslug": "meta-llama/llama-3.3-70b-instruct", + "reasoning_config": null, + "router": null, + "short_name": "Llama 3.3 70B Instruct", + "slug": "meta-llama/llama-3.3-70b-instruct", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + { + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "endpoint": { + "adapter_name": "WandbAdapter", + "can_abort": true, + "context_length": 131072, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://wandb.ai/site/privacy/", + "retainsPrompts": true, + "termsOfServiceURL": "https://site.wandb.ai/terms/", + "training": false + }, + "features": { + "is_mandatory_reasoning": true, + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": true, + "id": "3ca6292f-cef7-48a2-bf52-988e934cab57", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": 250, + "limit_rpm_cf": null, + "max_completion_tokens": 131072, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "openai", + "context_length": 131072, + "created_at": "2025-08-05T17:17:11+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "GPT", + "has_text_output": true, + "hf_slug": "openai/gpt-oss-120b", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "OpenAI: gpt-oss-120b", + "output_modalities": ["text"], + "permaslug": "openai/gpt-oss-120b", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "gpt-oss-120b", + "slug": "openai/gpt-oss-120b", + "updated_at": "2026-01-08T19:23:52.555156+00:00", + "warning_message": null + }, + "model_variant_permaslug": "openai/gpt-oss-120b", + "model_variant_slug": "openai/gpt-oss-120b", + "moderation_required": false, + "name": "WandB | openai/gpt-oss-120b", + "pricing": { + "completion": "0.0000006", + "discount": 0, + "input_cache_read": "0.00000015", + "prompt": "0.00000015" }, "provider_display_name": "Weights & Biases", "provider_info": { @@ -156670,12 +155975,8 @@ "pricing": { "completion": "0.0000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000005", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000005", + "prompt": "0.00000005" }, "provider_display_name": "Weights & Biases", "provider_info": { @@ -156855,12 +156156,8 @@ "pricing": { "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.0000001", + "prompt": "0.0000001" }, "provider_display_name": "Weights & Biases", "provider_info": { @@ -157035,12 +156332,8 @@ "pricing": { "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.000001", - "request": "0", - "web_search": "0" + "input_cache_read": "0.000001", + "prompt": "0.000001" }, "provider_display_name": "Weights & Biases", "provider_info": { @@ -157150,6 +156443,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supported_parameters": { "response_format": true, "structured_outputs": true @@ -157224,12 +156518,8 @@ "pricing": { "completion": "0.000002", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.00000055", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000055", + "prompt": "0.00000055" }, "provider_display_name": "Weights & Biases", "provider_info": { @@ -157421,13 +156711,9 @@ "pricing": { "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000075", - "internal_reasoning": "0", "prompt": "0.000003", - "request": "0", - "web_search": "0" + "web_search": "0.005" }, "provider_display_name": "xAI", "provider_info": { @@ -157589,13 +156875,9 @@ "pricing": { "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000075", - "internal_reasoning": "0", "prompt": "0.000003", - "request": "0", - "web_search": "0" + "web_search": "0.005" }, "provider_display_name": "xAI", "provider_info": { @@ -157772,13 +157054,9 @@ "pricing": { "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.000000075", - "internal_reasoning": "0", "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "web_search": "0.005" }, "provider_display_name": "xAI", "provider_info": { @@ -157964,13 +157242,9 @@ "pricing": { "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.000000075", - "internal_reasoning": "0", "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "web_search": "0.005" }, "provider_display_name": "xAI", "provider_info": { @@ -158148,13 +157422,9 @@ "pricing": { "completion": "0.000015", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000075", - "internal_reasoning": "0", "prompt": "0.000003", - "request": "0", - "web_search": "0" + "web_search": "0.005" }, "provider_display_name": "xAI", "provider_info": { @@ -158351,13 +157621,9 @@ "pricing": { "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000005", - "internal_reasoning": "0", "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "web_search": "0.005" }, "provider_display_name": "xAI", "provider_info": { @@ -158550,13 +157816,9 @@ "pricing": { "completion": "0.0000005", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000005", - "internal_reasoning": "0", "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "web_search": "0.005" }, "provider_display_name": "xAI", "provider_info": { @@ -158652,48 +157914,70 @@ "warning_message": null }, { - "author": "Other", + "author": "x-ai", "context_length": 256000, - "created_at": "2025-08-26T20:08:47.000Z", - "default_parameters": null, + "created_at": "2025-08-26T20:08:47+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "A reasoning model that is blazing fast and excels at agentic coding, accessible for free in Kilo Code for a limited time. (Note: prompts and completions are logged by xAI and used to improve the model.)", + "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality work flows.", "endpoint": { - "adapter_name": "other", + "adapter_name": "XAIResponsesAdapter", "can_abort": true, "context_length": 256000, "data_policy": { "canPublish": false, + "privacyPolicyURL": "https://x.ai/legal/privacy-policy", + "requiresUserIDs": true, "retainsPrompts": true, - "training": true + "termsOfServiceURL": "https://x.ai/legal/terms-of-service-enterprise", + "training": false + }, + "features": { + "supported_parameters": { + "response_format": true, + "structured_outputs": true + }, + "supports_implicit_caching": true, + "supports_input_audio": false, + "supports_multipart": true, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } }, - "features": null, "has_chat_completions": true, - "has_completions": false, - "id": "x-ai/grok-code-fast-1", + "has_completions": true, + "id": "b352d924-78f4-4db6-963f-70f440d07776", "is_byok": false, "is_deranked": false, "is_disabled": false, - "is_free": true, + "is_free": false, "is_hidden": false, "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, "max_completion_tokens": 10000, - "max_prompt_images": null, - "max_prompt_tokens": 256000, + "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { - "author": "Other", + "author": "x-ai", "context_length": 256000, - "created_at": "2025-08-26T20:08:47.000Z", - "default_parameters": null, + "created_at": "2025-08-26T20:08:47+00:00", + "default_parameters": {}, "default_stops": [], "default_system": null, - "description": "A reasoning model that is blazing fast and excels at agentic coding, accessible for free in Kilo Code for a limited time. (Note: prompts and completions are logged by xAI and used to improve the model.)", - "features": null, - "group": "other", + "description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality work flows.", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Grok", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, @@ -158701,79 +157985,95 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "xAI: Grok Code Fast 1 (free)", + "name": "xAI: Grok Code Fast 1", "output_modalities": ["text"], "permaslug": "x-ai/grok-code-fast-1", - "reasoning_config": null, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "xAI: Grok Code Fast 1 (free)", + "short_name": "Grok Code Fast 1", "slug": "x-ai/grok-code-fast-1", - "updated_at": "2026-01-13T12:11:27.631Z", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, "model_variant_permaslug": "x-ai/grok-code-fast-1", "model_variant_slug": "x-ai/grok-code-fast-1", "moderation_required": false, - "name": "xAI: Grok Code Fast 1 (free)", + "name": "xAI | x-ai/grok-code-fast-1", "pricing": { - "completion": "0.0000000", + "completion": "0.0000015", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000000", - "internal_reasoning": "0", - "prompt": "0.0000000", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000002", + "prompt": "0.0000002", + "web_search": "0.005" }, - "provider_display_name": "Other", + "provider_display_name": "xAI", "provider_info": { - "adapterName": "other", - "baseUrl": "https://kilo.ai", - "byokEnabled": false, + "adapterName": "XAIResponsesAdapter", + "baseUrl": "https://api.x.ai/v1", + "byokEnabled": true, "dataPolicy": { "canPublish": false, + "privacyPolicyURL": "https://x.ai/legal/privacy-policy", + "requiresUserIDs": true, "retainsPrompts": true, - "training": true + "termsOfServiceURL": "https://x.ai/legal/terms-of-service-enterprise", + "training": false }, - "displayName": "Other", - "editors": [], + "displayName": "xAI", + "editors": ["{}"], "hasChatCompletions": true, - "hasCompletions": false, - "headquarters": "Unknown", + "hasCompletions": true, + "headquarters": "US", "icon": { - "className": "rounded-sm", - "url": "https://via.placeholder.com/32x32/000000/FFFFFF?text=S" + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://x.ai/&size=256" }, "ignoredProviderModels": [], "isAbortable": true, "isMultipartSupported": true, "moderationRequired": false, - "name": "Other", - "owners": [], - "slug": "other", - "statusPageUrl": null + "name": "xAI", + "owners": ["{}"], + "slug": "xai", + "statusPageUrl": "https://status.x.ai/" }, - "provider_model_id": "x-ai/grok-code-fast-1", - "provider_name": "Other", + "provider_model_id": "grok-code-fast-1", + "provider_name": "xAI", "provider_region": null, - "provider_slug": "other", - "quantization": null, + "provider_slug": "xai", + "quantization": "unknown", "supported_parameters": [ + "reasoning", + "include_reasoning", + "structured_outputs", + "response_format", "max_tokens", "temperature", + "top_p", + "seed", + "logprobs", + "top_logprobs", + "stop", "tools", - "reasoning", - "include_reasoning" + "tool_choice" ], "supports_multipart": true, - "supports_reasoning": false, + "supports_reasoning": true, "supports_tool_parameters": true, "variable_pricings": [], - "variant": "default" + "variant": "standard" }, - "features": null, - "group": "other", + "features": { + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + } + }, + "group": "Grok", "has_text_output": true, "hf_slug": null, "hf_updated_at": null, @@ -158781,14 +158081,18 @@ "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "xAI: Grok Code Fast 1 (free)", + "name": "xAI: Grok Code Fast 1", "output_modalities": ["text"], "permaslug": "x-ai/grok-code-fast-1", - "reasoning_config": null, + "reasoning_config": { + "end_token": null, + "start_token": null, + "system_prompt": null + }, "router": null, - "short_name": "xAI: Grok Code Fast 1 (free)", + "short_name": "Grok Code Fast 1", "slug": "x-ai/grok-code-fast-1", - "updated_at": "2026-01-13T12:11:27.631Z", + "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null } ], @@ -158807,7 +158111,195 @@ "icon": { "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.mi.com/&size=256" }, - "models": [], + "models": [ + { + "author": "xiaomi", + "context_length": 262144, + "created_at": "2025-12-14T16:55:08+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": 0.95 + }, + "default_stops": [], + "default_system": "You are MiMo, an AI assistant developed by Xiaomi.\n\nYour knowledge cutoff date is December 2024.", + "description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config).", + "endpoint": { + "adapter_name": "XiaomiAdapter", + "can_abort": false, + "context_length": 262144, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://platform.xiaomimimo.com/#/docs/terms/privacy-policy", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://platform.xiaomimimo.com/#/docs/terms/user-agreement", + "training": false + }, + "features": { + "reasoning_return_mechanism": "reasoning-content", + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "3dde4379-f449-4ed8-8a33-42f58b33de21", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 65536, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "xiaomi", + "context_length": 262144, + "created_at": "2025-12-14T16:55:08+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": null, + "top_p": 0.95 + }, + "default_stops": [], + "default_system": "You are MiMo, an AI assistant developed by Xiaomi.\n\nYour knowledge cutoff date is December 2024.", + "description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a hybrid-thinking toggle and a 256K context window, and excels at reasoning, coding, and agent scenarios. On SWE-bench Verified and SWE-bench Multilingual, MiMo-V2-Flash ranks as the top #1 open-source model globally, delivering performance comparable to Claude Sonnet 4.5 while costing only about 3.5% as much.\n\nUsers can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config).", + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "XiaomiMiMo/MiMo-V2-Flash", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Xiaomi: MiMo-V2-Flash", + "output_modalities": ["text"], + "permaslug": "xiaomi/mimo-v2-flash-20251210", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "MiMo-V2-Flash", + "slug": "xiaomi/mimo-v2-flash", + "updated_at": "2026-01-21T16:26:04.702272+00:00", + "warning_message": null + }, + "model_variant_permaslug": "xiaomi/mimo-v2-flash-20251210", + "model_variant_slug": "xiaomi/mimo-v2-flash", + "moderation_required": false, + "name": "Xiaomi | xiaomi/mimo-v2-flash-20251210", + "pricing": { + "completion": "0.0000003", + "discount": 0, + "input_cache_read": "0.00000001", + "prompt": "0.0000001" + }, + "provider_display_name": "Xiaomi", + "provider_info": { + "adapterName": "XiaomiAdapter", + "baseUrl": "https://api.xiaomimimo.com/v1", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://platform.xiaomimimo.com/#/docs/terms/privacy-policy", + "requiresUserIDs": true, + "retainsPrompts": true, + "termsOfServiceURL": "https://platform.xiaomimimo.com/#/docs/terms/user-agreement", + "training": false + }, + "displayName": "Xiaomi", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": false, + "headquarters": "CN", + "icon": { + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://www.mi.com/&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": false, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Xiaomi", + "owners": [ + "user_353qok8CmkPxVcEtDuGZzQOEu4u", + "user_35F3bTvDBxn8U8NlFRLtIufJNlD", + "user_36vqlXmXNo4fc4k6musIPTdL7Sj" + ], + "slug": "xiaomi", + "statusPageUrl": null + }, + "provider_model_id": "xiaomi/mimo-v2-flash", + "provider_name": "Xiaomi", + "provider_region": null, + "provider_slug": "xiaomi/fp8", + "quantization": "fp8", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p", + "stop", + "frequency_penalty", + "presence_penalty", + "response_format", + "tool_choice", + "tools" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "chat_template_config": {}, + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "XiaomiMiMo/MiMo-V2-Flash", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Xiaomi: MiMo-V2-Flash", + "output_modalities": ["text"], + "permaslug": "xiaomi/mimo-v2-flash-20251210", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "MiMo-V2-Flash", + "slug": "xiaomi/mimo-v2-flash", + "updated_at": "2026-01-21T16:26:04.702272+00:00", + "warning_message": null + } + ], "name": "Xiaomi", "slug": "xiaomi" }, @@ -158922,12 +158414,7 @@ "pricing": { "completion": "0.0000001", "discount": 0, - "image": "0", - "image_output": "0", - "internal_reasoning": "0", - "prompt": "0.0000001", - "request": "0", - "web_search": "0" + "prompt": "0.0000001" }, "provider_display_name": "Z.ai", "provider_info": { @@ -159024,6 +158511,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supported_parameters": { "response_format": true, "structured_outputs": false @@ -159098,13 +158586,8 @@ "pricing": { "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000011", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "Z.ai", "provider_info": { @@ -159211,6 +158694,7 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supported_parameters": {}, "supports_tool_choice": { "literal_auto": true, @@ -159281,13 +158765,8 @@ "pricing": { "completion": "0.0000011", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000003", - "internal_reasoning": "0", - "prompt": "0.0000002", - "request": "0", - "web_search": "0" + "prompt": "0.0000002" }, "provider_display_name": "Z.ai", "provider_info": { @@ -159348,42 +158827,222 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5-Air", + "hf_slug": "zai-org/GLM-4.5-Air", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Z.AI: GLM 4.5 Air", + "output_modalities": ["text"], + "permaslug": "z-ai/glm-4.5-air", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "GLM 4.5 Air", + "slug": "z-ai/glm-4.5-air", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + { + "author": "z-ai", + "context_length": 65536, + "created_at": "2025-08-11T14:24:48.340676+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.75, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "endpoint": { + "adapter_name": "ZAIAdapter", + "can_abort": true, + "context_length": 65536, + "data_policy": { + "canPublish": false, + "privacyPolicyURL": "https://chat.z.ai/legal-agreement/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://chat.z.ai/legal-agreement/terms-of-service", + "training": false + }, + "features": { + "reasoning_return_mechanism": "reasoning-content", + "supported_parameters": {}, + "supports_implicit_caching": true, + "supports_input_audio": false, + "supports_multipart": true, + "supports_tool_choice": { + "literal_auto": true, + "literal_none": true, + "literal_required": true, + "type_function": true + } + }, + "has_chat_completions": true, + "has_completions": false, + "id": "d1b1044b-35cd-48df-8a1e-dbf567d8776a", + "is_byok": false, + "is_deranked": false, + "is_disabled": false, + "is_free": false, + "is_hidden": false, + "limit_rpd": null, + "limit_rpm": null, + "limit_rpm_cf": null, + "max_completion_tokens": 16384, + "max_prompt_tokens": null, + "max_tokens_per_image": null, + "model": { + "author": "z-ai", + "context_length": 65536, + "created_at": "2025-08-11T14:24:48.340676+00:00", + "default_parameters": { + "frequency_penalty": null, + "temperature": 0.75, + "top_p": null + }, + "default_stops": [], + "default_system": null, + "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "zai-org/GLM-4.5V", + "hf_updated_at": null, + "hidden": false, + "input_modalities": ["text", "image"], + "instruct_type": null, + "model_version_group_id": null, + "name": "Z.AI: GLM 4.5V", + "output_modalities": ["text"], + "permaslug": "z-ai/glm-4.5v", + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + }, + "router": null, + "short_name": "GLM 4.5V", + "slug": "z-ai/glm-4.5v", + "updated_at": "2025-11-10T16:00:38.246665+00:00", + "warning_message": null + }, + "model_variant_permaslug": "z-ai/glm-4.5v", + "model_variant_slug": "z-ai/glm-4.5v", + "moderation_required": false, + "name": "Z.AI | z-ai/glm-4.5v", + "pricing": { + "completion": "0.0000018", + "discount": 0, + "input_cache_read": "0.00000011", + "prompt": "0.0000006" + }, + "provider_display_name": "Z.ai", + "provider_info": { + "adapterName": "ZAIAdapter", + "baseUrl": "https://api.z.ai/api/paas/v4", + "byokEnabled": true, + "dataPolicy": { + "canPublish": false, + "privacyPolicyURL": "https://chat.z.ai/legal-agreement/privacy-policy", + "retainsPrompts": false, + "termsOfServiceURL": "https://chat.z.ai/legal-agreement/terms-of-service", + "training": false + }, + "displayName": "Z.ai", + "editors": [], + "hasChatCompletions": true, + "hasCompletions": false, + "headquarters": "SG", + "icon": { + "className": "invert-0 dark:invert", + "url": "https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://z.ai/model-api&size=256" + }, + "ignoredProviderModels": [], + "isAbortable": true, + "isMultipartSupported": true, + "moderationRequired": false, + "name": "Z.AI", + "owners": [], + "slug": "z-ai", + "statusPageUrl": null + }, + "provider_model_id": "glm-4.5v", + "provider_name": "Z.AI", + "provider_region": null, + "provider_slug": "z-ai/fp8", + "quantization": "fp8", + "supported_parameters": [ + "reasoning", + "include_reasoning", + "max_tokens", + "temperature", + "top_p", + "tools", + "tool_choice" + ], + "supports_multipart": true, + "supports_reasoning": true, + "supports_tool_parameters": true, + "variable_pricings": [], + "variant": "standard" + }, + "features": { + "reasoning_config": { + "end_token": "", + "start_token": "", + "system_prompt": null + } + }, + "group": "Other", + "has_text_output": true, + "hf_slug": "zai-org/GLM-4.5V", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["text", "image"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5 Air", + "name": "Z.AI: GLM 4.5V", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5-air", + "permaslug": "z-ai/glm-4.5v", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.5 Air", - "slug": "z-ai/glm-4.5-air", + "short_name": "GLM 4.5V", + "slug": "z-ai/glm-4.5v", "updated_at": "2025-11-10T16:00:38.246665+00:00", "warning_message": null }, { "author": "z-ai", - "context_length": 65536, - "created_at": "2025-08-11T14:24:48.340676+00:00", + "context_length": 200000, + "created_at": "2025-09-30T12:32:56.306946+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.75, + "temperature": 0.6, "top_p": null }, "default_stops": [], "default_system": null, - "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "endpoint": { "adapter_name": "ZAIAdapter", "can_abort": true, - "context_length": 65536, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://chat.z.ai/legal-agreement/privacy-policy", @@ -159392,10 +159051,9 @@ "training": false }, "features": { + "reasoning_return_mechanism": "reasoning-content", "supported_parameters": {}, - "supports_implicit_caching": true, "supports_input_audio": false, - "supports_multipart": true, "supports_tool_choice": { "literal_auto": true, "literal_none": true, @@ -159405,7 +159063,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "d1b1044b-35cd-48df-8a1e-dbf567d8776a", + "id": "a87bfa67-bad9-4ce3-8c20-99b61249cfc3", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -159414,22 +159072,23 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 16384, + "max_completion_tokens": 128000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "z-ai", - "context_length": 65536, - "created_at": "2025-08-11T14:24:48.340676+00:00", + "context_length": 200000, + "created_at": "2025-09-30T12:32:56.306946+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.75, + "temperature": 0.6, "top_p": null }, "default_stops": [], "default_system": null, - "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", + "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", @@ -159438,40 +159097,35 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5V", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5V", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5v", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.5V", - "slug": "z-ai/glm-4.5v", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.5v", - "model_variant_slug": "z-ai/glm-4.5v", + "model_variant_permaslug": "z-ai/glm-4.6", + "model_variant_slug": "z-ai/glm-4.6", "moderation_required": false, - "name": "Z.AI | z-ai/glm-4.5v", + "name": "Z.AI | z-ai/glm-4.6", "pricing": { - "completion": "0.0000018", + "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", "input_cache_read": "0.00000011", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "prompt": "0.0000006" }, "provider_display_name": "Z.ai", "provider_info": { @@ -159503,11 +159157,11 @@ "slug": "z-ai", "statusPageUrl": null }, - "provider_model_id": "glm-4.5v", + "provider_model_id": "glm-4.6", "provider_name": "Z.AI", "provider_region": null, - "provider_slug": "z-ai/fp8", - "quantization": "fp8", + "provider_slug": "z-ai", + "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", @@ -159524,6 +159178,7 @@ "variant": "standard" }, "features": { + "chat_template_config": {}, "reasoning_config": { "end_token": "", "start_token": "", @@ -159532,42 +159187,42 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.5V", + "hf_slug": null, "hf_updated_at": null, "hidden": false, - "input_modalities": ["text", "image"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.5V", + "name": "Z.AI: GLM 4.6", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.5v", + "permaslug": "z-ai/glm-4.6", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.5V", - "slug": "z-ai/glm-4.5v", - "updated_at": "2025-11-10T16:00:38.246665+00:00", + "short_name": "GLM 4.6", + "slug": "z-ai/glm-4.6", + "updated_at": "2025-11-10T23:35:06.53534+00:00", "warning_message": null }, { "author": "z-ai", - "context_length": 200000, - "created_at": "2025-09-30T12:32:56.306946+00:00", + "context_length": 131072, + "created_at": "2025-12-08T15:24:22.464154+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, - "top_p": null + "temperature": 0.8, + "top_p": 0.6 }, "default_stops": [], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", "endpoint": { "adapter_name": "ZAIAdapter", "can_abort": true, - "context_length": 200000, + "context_length": 131072, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://chat.z.ai/legal-agreement/privacy-policy", @@ -159576,18 +159231,21 @@ "training": false }, "features": { - "supported_parameters": {}, + "reasoning_return_mechanism": "reasoning-content", + "supports_base64_video_input": true, + "supports_implicit_caching": true, "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, - "literal_none": true, - "literal_required": true, - "type_function": true - } + "literal_none": false, + "literal_required": false, + "type_function": false + }, + "supports_video_urls": true }, "has_chat_completions": true, "has_completions": false, - "id": "a87bfa67-bad9-4ce3-8c20-99b61249cfc3", + "id": "edd46a04-7eac-4d37-bfb6-207ef7905f90", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -159596,21 +159254,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 128000, + "max_completion_tokens": 24000, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "z-ai", - "context_length": 200000, - "created_at": "2025-09-30T12:32:56.306946+00:00", + "context_length": 131072, + "created_at": "2025-12-08T15:24:22.464154+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.6, - "top_p": null + "temperature": 0.8, + "top_p": 0.6 }, "default_stops": [], "default_system": null, - "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", + "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -159621,40 +159279,35 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "zai-org/GLM-4.6V", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "Z.AI: GLM 4.6V", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "z-ai/glm-4.6-20251208", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "GLM 4.6V", + "slug": "z-ai/glm-4.6v", + "updated_at": "2025-12-08T15:45:24.970322+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6", - "model_variant_slug": "z-ai/glm-4.6", + "model_variant_permaslug": "z-ai/glm-4.6-20251208", + "model_variant_slug": "z-ai/glm-4.6v", "moderation_required": false, - "name": "Z.AI | z-ai/glm-4.6", + "name": "Z.AI | z-ai/glm-4.6-20251208", "pricing": { - "completion": "0.0000022", + "completion": "0.0000009", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000011", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000005", + "prompt": "0.0000003" }, "provider_display_name": "Z.ai", "provider_info": { @@ -159686,19 +159339,19 @@ "slug": "z-ai", "statusPageUrl": null }, - "provider_model_id": "glm-4.6", + "provider_model_id": "glm-4.6v", "provider_name": "Z.AI", "provider_region": null, - "provider_slug": "z-ai", - "quantization": "unknown", + "provider_slug": "z-ai/fp8", + "quantization": "fp8", "supported_parameters": [ "reasoning", "include_reasoning", "max_tokens", "temperature", "top_p", - "tools", - "tool_choice" + "tool_choice", + "tools" ], "supports_multipart": true, "supports_reasoning": true, @@ -159716,42 +159369,42 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": null, + "hf_slug": "zai-org/GLM-4.6V", "hf_updated_at": null, "hidden": false, - "input_modalities": ["text"], + "input_modalities": ["image", "text", "video"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6", + "name": "Z.AI: GLM 4.6V", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6", + "permaslug": "z-ai/glm-4.6-20251208", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.6", - "slug": "z-ai/glm-4.6", - "updated_at": "2025-11-10T23:35:06.53534+00:00", + "short_name": "GLM 4.6V", + "slug": "z-ai/glm-4.6v", + "updated_at": "2025-12-08T15:45:24.970322+00:00", "warning_message": null }, { "author": "z-ai", - "context_length": 131072, - "created_at": "2025-12-08T15:24:22.464154+00:00", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.8, - "top_p": 0.6 + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "endpoint": { "adapter_name": "ZAIAdapter", "can_abort": true, - "context_length": 131072, + "context_length": 200000, "data_policy": { "canPublish": false, "privacyPolicyURL": "https://chat.z.ai/legal-agreement/privacy-policy", @@ -159760,20 +159413,18 @@ "training": false }, "features": { - "supports_base64_video_input": true, - "supports_implicit_caching": true, + "reasoning_return_mechanism": "reasoning-content", "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": false, "literal_required": false, "type_function": false - }, - "supports_video_urls": true + } }, "has_chat_completions": true, "has_completions": false, - "id": "edd46a04-7eac-4d37-bfb6-207ef7905f90", + "id": "be4acbf5-9fcf-4332-a01d-76dfeb6d7b99", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -159782,21 +159433,21 @@ "limit_rpd": null, "limit_rpm": null, "limit_rpm_cf": null, - "max_completion_tokens": 24000, + "max_completion_tokens": 131072, "max_prompt_tokens": null, "max_tokens_per_image": null, "model": { "author": "z-ai", - "context_length": 131072, - "created_at": "2025-12-08T15:24:22.464154+00:00", + "context_length": 200000, + "created_at": "2025-12-22T04:33:34.884504+00:00", "default_parameters": { "frequency_penalty": null, - "temperature": 0.8, - "top_p": 0.6 + "temperature": 1, + "top_p": 0.95 }, "default_stops": [], "default_system": null, - "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", + "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "features": { "chat_template_config": {}, "reasoning_config": { @@ -159807,40 +159458,35 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.6V", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6V", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6-20251208", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.6V", - "slug": "z-ai/glm-4.6v", - "updated_at": "2025-12-08T15:45:24.970322+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.6-20251208", - "model_variant_slug": "z-ai/glm-4.6v", + "model_variant_permaslug": "z-ai/glm-4.7-20251222", + "model_variant_slug": "z-ai/glm-4.7", "moderation_required": false, - "name": "Z.AI | z-ai/glm-4.6-20251208", + "name": "Z.AI | z-ai/glm-4.7-20251222", "pricing": { - "completion": "0.0000009", + "completion": "0.0000022", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000005", - "internal_reasoning": "0", - "prompt": "0.0000003", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000011", + "prompt": "0.0000006" }, "provider_display_name": "Z.ai", "provider_info": { @@ -159872,19 +159518,20 @@ "slug": "z-ai", "statusPageUrl": null }, - "provider_model_id": "glm-4.6v", + "provider_model_id": "glm-4.7", "provider_name": "Z.AI", "provider_region": null, - "provider_slug": "z-ai/fp8", - "quantization": "fp8", + "provider_slug": "z-ai", + "quantization": "unknown", "supported_parameters": [ "reasoning", "include_reasoning", "max_tokens", "temperature", "top_p", + "tools", "tool_choice", - "tools" + "response_format" ], "supports_multipart": true, "supports_reasoning": true, @@ -159902,30 +159549,30 @@ }, "group": "Other", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.6V", + "hf_slug": "zai-org/GLM-4.7", "hf_updated_at": null, "hidden": false, - "input_modalities": ["image", "text", "video"], + "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.6V", + "name": "Z.AI: GLM 4.7", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.6-20251208", + "permaslug": "z-ai/glm-4.7-20251222", "reasoning_config": { "end_token": "", "start_token": "", "system_prompt": null }, "router": null, - "short_name": "GLM 4.6V", - "slug": "z-ai/glm-4.6v", - "updated_at": "2025-12-08T15:45:24.970322+00:00", + "short_name": "GLM 4.7", + "slug": "z-ai/glm-4.7", + "updated_at": "2026-01-07T19:34:06.523149+00:00", "warning_message": null }, { "author": "z-ai", "context_length": 200000, - "created_at": "2025-12-22T04:33:34.884504+00:00", + "created_at": "2026-01-19T14:45:13.352372+00:00", "default_parameters": { "frequency_penalty": null, "temperature": 1, @@ -159933,7 +159580,7 @@ }, "default_stops": [], "default_system": null, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.", "endpoint": { "adapter_name": "ZAIAdapter", "can_abort": true, @@ -159947,7 +159594,6 @@ }, "features": { "reasoning_return_mechanism": "reasoning-content", - "supports_input_audio": false, "supports_tool_choice": { "literal_auto": true, "literal_none": false, @@ -159957,7 +159603,7 @@ }, "has_chat_completions": true, "has_completions": false, - "id": "be4acbf5-9fcf-4332-a01d-76dfeb6d7b99", + "id": "18a62ad3-010d-49c1-87bd-c2af7056db0b", "is_byok": false, "is_deranked": false, "is_disabled": false, @@ -159972,7 +159618,7 @@ "model": { "author": "z-ai", "context_length": 200000, - "created_at": "2025-12-22T04:33:34.884504+00:00", + "created_at": "2026-01-19T14:45:13.352372+00:00", "default_parameters": { "frequency_penalty": null, "temperature": 1, @@ -159980,51 +159626,38 @@ }, "default_stops": [], "default_system": null, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.", "features": { "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } + "reasoning_config": {} }, "group": "Other", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.7", + "hf_slug": "zai-org/GLM-4.7-Flash", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.7", + "name": "Z.AI: GLM 4.7 Flash", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.7-20251222", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "z-ai/glm-4.7-flash-20260119", + "reasoning_config": {}, "router": null, - "short_name": "GLM 4.7", - "slug": "z-ai/glm-4.7", - "updated_at": "2026-01-07T19:34:06.523149+00:00", + "short_name": "GLM 4.7 Flash", + "slug": "z-ai/glm-4.7-flash", + "updated_at": "2026-01-19T15:38:17.116015+00:00", "warning_message": null }, - "model_variant_permaslug": "z-ai/glm-4.7-20251222", - "model_variant_slug": "z-ai/glm-4.7", + "model_variant_permaslug": "z-ai/glm-4.7-flash-20260119", + "model_variant_slug": "z-ai/glm-4.7-flash", "moderation_required": false, - "name": "Z.AI | z-ai/glm-4.7-20251222", + "name": "Z.AI | z-ai/glm-4.7-flash-20260119", "pricing": { - "completion": "0.0000022", + "completion": "0.0000004", "discount": 0, - "image": "0", - "image_output": "0", - "input_cache_read": "0.00000011", - "internal_reasoning": "0", - "prompt": "0.0000006", - "request": "0", - "web_search": "0" + "input_cache_read": "0.00000001", + "prompt": "0.00000007" }, "provider_display_name": "Z.ai", "provider_info": { @@ -160056,7 +159689,7 @@ "slug": "z-ai", "statusPageUrl": null }, - "provider_model_id": "glm-4.7", + "provider_model_id": "GLM-4.7-flashx", "provider_name": "Z.AI", "provider_region": null, "provider_slug": "z-ai", @@ -160079,32 +159712,24 @@ }, "features": { "chat_template_config": {}, - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - } + "reasoning_config": {} }, "group": "Other", "has_text_output": true, - "hf_slug": "zai-org/GLM-4.7", + "hf_slug": "zai-org/GLM-4.7-Flash", "hf_updated_at": null, "hidden": false, "input_modalities": ["text"], "instruct_type": null, "model_version_group_id": null, - "name": "Z.AI: GLM 4.7", + "name": "Z.AI: GLM 4.7 Flash", "output_modalities": ["text"], - "permaslug": "z-ai/glm-4.7-20251222", - "reasoning_config": { - "end_token": "", - "start_token": "", - "system_prompt": null - }, + "permaslug": "z-ai/glm-4.7-flash-20260119", + "reasoning_config": {}, "router": null, - "short_name": "GLM 4.7", - "slug": "z-ai/glm-4.7", - "updated_at": "2026-01-07T19:34:06.523149+00:00", + "short_name": "GLM 4.7 Flash", + "slug": "z-ai/glm-4.7-flash", + "updated_at": "2026-01-19T15:38:17.116015+00:00", "warning_message": null } ], @@ -160112,6 +159737,6 @@ "slug": "z-ai" } ], - "total_models": 778, - "total_providers": 62 + "total_models": 791, + "total_providers": 65 } diff --git a/src/lib/model-utils.ts b/src/lib/model-utils.ts index 3dfd451903..d8f9cfe72d 100644 --- a/src/lib/model-utils.ts +++ b/src/lib/model-utils.ts @@ -4,10 +4,9 @@ */ /** - * Normalize a model ID by removing the `:free` suffix if present. + * Normalize a model ID by removing the `:free`, `:exacto`, etc. suffixes if present. */ export function normalizeModelId(modelId: string): string { - return modelId.endsWith(':free') - ? modelId.substring(0, modelId.length - ':free'.length) - : modelId; + const colonIndex = modelId.indexOf(':'); + return colonIndex >= 0 ? modelId.substring(0, colonIndex) : modelId; } diff --git a/src/lib/models.test.ts b/src/lib/models.test.ts index 8da3e037a8..50ff40e4a3 100644 --- a/src/lib/models.test.ts +++ b/src/lib/models.test.ts @@ -9,11 +9,11 @@ describe('isFreeModel', () => { expect(isFreeModel('openrouter/sonoma-sky-alpha')).toBe(false); }); - test('should return false for models ending with :free', () => { - expect(isFreeModel('gpt-4:free')).toBe(false); - expect(isFreeModel('claude-3:free')).toBe(false); - expect(isFreeModel('some-model:free')).toBe(false); - expect(isFreeModel(':free')).toBe(false); + test('should return true for models ending with :free', () => { + expect(isFreeModel('gpt-4:free')).toBe(true); + expect(isFreeModel('claude-3:free')).toBe(true); + expect(isFreeModel('some-model:free')).toBe(true); + expect(isFreeModel(':free')).toBe(true); }); }); @@ -60,7 +60,7 @@ describe('isFreeModel', () => { expect(isFreeModel('sonic ')).toBe(false); expect(isFreeModel(' sonic ')).toBe(false); expect(isFreeModel('model:free ')).toBe(false); - expect(isFreeModel(' model:free')).toBe(false); + expect(isFreeModel(' model:free')).toBe(true); }); }); }); diff --git a/src/lib/models.ts b/src/lib/models.ts index a1a304e18b..3ca7f162b4 100644 --- a/src/lib/models.ts +++ b/src/lib/models.ts @@ -3,15 +3,11 @@ */ import { opus_46_free_slackbot_model } from '@/lib/providers/anthropic'; -import { arcee_trinity_large_preview_free_model } from '@/lib/providers/arcee'; import { corethink_free_model } from '@/lib/providers/corethink'; import { giga_potato_model } from '@/lib/providers/gigapotato'; import type { KiloFreeModel } from '@/lib/providers/kilo-free-model'; import { minimax_m21_free_model, minimax_m21_free_slackbot_model } from '@/lib/providers/minimax'; -import { devstral_2512_free_model, devstral_small_2512_free_model } from '@/lib/providers/mistral'; -import { pony_alpha_free_model } from '@/lib/providers/openrouter-free-models'; import { recommendedModels } from '@/lib/providers/recommended-models'; -import { kat_coder_pro_free_model } from '@/lib/providers/streamlake'; import { grok_code_fast_1_optimized_free_model } from '@/lib/providers/xai'; import { zai_glm47_free_model } from '@/lib/providers/zai'; @@ -25,26 +21,31 @@ export function getFirstFreeModel() { export const preferredModels = recommendedModels.map(m => m.public_id); +const freeOpenRouterModels = [ + 'openrouter/aurora-alpha', + 'openrouter/pony-alpha', + 'openrouter/free', +]; + export function isFreeModel(model: string): boolean { - return !!kiloFreeModels.find(m => m.public_id === model && m.is_enabled); + return ( + kiloFreeModels.some(m => m.public_id === model && m.is_enabled) || + (model ?? '').endsWith(':free') || + freeOpenRouterModels.includes(model) + ); } export function isDataCollectionRequiredOnKiloCodeOnly(model: string): boolean { - return isFreeModel(model); + return kiloFreeModels.some(m => m.public_id === model && m.is_enabled); } export const kiloFreeModels = [ - arcee_trinity_large_preview_free_model, corethink_free_model, - devstral_2512_free_model, - devstral_small_2512_free_model, giga_potato_model, - kat_coder_pro_free_model, minimax_m21_free_model, minimax_m21_free_slackbot_model, opus_46_free_slackbot_model, grok_code_fast_1_optimized_free_model, - pony_alpha_free_model, zai_glm47_free_model, ] as KiloFreeModel[]; diff --git a/src/lib/providers/arcee.ts b/src/lib/providers/arcee.ts deleted file mode 100644 index d98953dcb4..0000000000 --- a/src/lib/providers/arcee.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { type KiloFreeModel } from '@/lib/providers/kilo-free-model'; - -export const arcee_trinity_large_preview_free_model = { - public_id: 'arcee-ai/trinity-large-preview:free', - display_name: 'Arcee AI: Trinity Large Preview (free)', - description: - 'Trinity Large Preview is a state-of-the-art large language model from Arcee AI, optimized for coding and general-purpose assistance.', - context_length: 128000, - max_completion_tokens: 16384, - is_enabled: true, - flags: [], - gateway: 'openrouter', - internal_id: 'arcee-ai/trinity-large-preview:free', - inference_providers: ['arcee-ai'], -} as KiloFreeModel; diff --git a/src/lib/providers/mistral.ts b/src/lib/providers/mistral.ts index d557c1a15a..79a8b3fe4b 100644 --- a/src/lib/providers/mistral.ts +++ b/src/lib/providers/mistral.ts @@ -1,4 +1,3 @@ -import type { KiloFreeModel } from '@/lib/providers/kilo-free-model'; import type { OpenRouterChatCompletionRequest } from '@/lib/providers/openrouter/types'; import { dropToolStrictProperties, @@ -6,30 +5,6 @@ import { normalizeToolCallIds, } from '@/lib/tool-calling'; -export const devstral_2512_free_model = { - public_id: 'mistralai/devstral-2512:free', - display_name: 'Mistral: Devstral 2 2512 (free)', - description: - 'Devstral 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 123B-parameter dense transformer model supporting a 256K context window. It is provided free of charge in Kilo Code for a limited time.\n**Note:** prompts and completions may be logged by Mistral during the free period and used to improve the model.', - context_length: 262144, - max_completion_tokens: 262144, - is_enabled: false, - flags: [], - gateway: 'openrouter', - internal_id: 'mistralai/devstral-2512:free', - inference_providers: ['mistral'], -} as KiloFreeModel; - -export const devstral_small_2512_free_model = { - ...devstral_2512_free_model, - public_id: 'mistralai/devstral-small-2512:free', - display_name: 'Mistral: Devstral Small 2 2512 (free)', - description: - 'Devstral Small 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 24B-parameter dense transformer model supporting a 256K context window.\n**Note:** prompts and completions may be logged by Mistral during the free period and used to improve the model.', - gateway: 'vercel', - internal_id: 'mistral/devstral-small-2', -} as KiloFreeModel; - export function isMistralModel(model: string) { return model.startsWith('mistralai/'); } diff --git a/src/lib/providers/openrouter-free-models.ts b/src/lib/providers/openrouter-free-models.ts deleted file mode 100644 index 06965572b8..0000000000 --- a/src/lib/providers/openrouter-free-models.ts +++ /dev/null @@ -1,17 +0,0 @@ -import type { KiloFreeModel } from '@/lib/providers/kilo-free-model'; - -export const pony_alpha_free_model = { - public_id: 'openrouter/pony-alpha', - display_name: 'Pony Alpha (free)', - description: - 'Pony Alpha is a stealth model optimized for speed and enhanced reasoning capabilities. ' + - 'It is provided free of charge in Kilo Code for a limited time.\n' + - '**Note:** Prompts and completions are logged and may be used to improve the model.', - context_length: 200_000, - max_completion_tokens: 32_000, - is_enabled: true, - flags: ['reasoning'], - gateway: 'openrouter', - internal_id: 'openrouter/pony-alpha', - inference_providers: ['stealth'], -} as KiloFreeModel; diff --git a/src/lib/providers/openrouter/index.ts b/src/lib/providers/openrouter/index.ts index 59d546e4e4..4e091eda9a 100644 --- a/src/lib/providers/openrouter/index.ts +++ b/src/lib/providers/openrouter/index.ts @@ -22,10 +22,6 @@ import { // Re-export from shared module for backwards compatibility export { normalizeModelId } from '@/lib/model-utils'; -export function isRateLimitedToDeathFree(model: string) { - return model.endsWith(':free') && !isFreeModel(model); -} - function buildAutoModel(): OpenRouterModel { return { id: KILO_AUTO_MODEL_ID, @@ -60,7 +56,6 @@ function enhancedModelList(models: OpenRouterModel[]) { const enhancedModels = models .filter( (model: OpenRouterModel) => - !isRateLimitedToDeathFree(model.id) && !kiloFreeModels.some(m => m.public_id === model.id && m.is_enabled) ) .concat( @@ -74,9 +69,16 @@ function enhancedModelList(models: OpenRouterModel[]) { model.id === KILO_AUTO_MODEL_ID ? -1 : preferredModels.indexOf(model.id); const ageDays = (Date.now() / 1_000 - model.created) / (24 * 3600); const isNew = preferredIndex >= 0 && ageDays >= 0 && ageDays < 7; + const nameEndsWithParen = model.name.endsWith(')'); return { ...model, - name: isNew ? model.name + ' (new)' : model.name, + name: nameEndsWithParen + ? model.name + : isFreeModel(model.id) + ? model.name + ' (free)' + : isNew + ? model.name + ' (new)' + : model.name, preferredIndex: preferredIndex >= 0 || model.id === KILO_AUTO_MODEL_ID ? preferredIndex : undefined, settings: getModelSettings(model.id), diff --git a/src/lib/providers/openrouter/sync-providers.ts b/src/lib/providers/openrouter/sync-providers.ts index af8023c503..05b6a7158b 100644 --- a/src/lib/providers/openrouter/sync-providers.ts +++ b/src/lib/providers/openrouter/sync-providers.ts @@ -1,6 +1,6 @@ import pLimit from 'p-limit'; import { kiloFreeModels } from '@/lib/models'; -import { isRateLimitedToDeathFree, normalizeModelId } from '@/lib/providers/openrouter'; +import { normalizeModelId } from '@/lib/providers/openrouter'; import { convertFromKiloModel } from '@/lib/providers/kilo-free-model'; import type { NormalizedOpenRouterResponse, @@ -277,18 +277,10 @@ export async function syncProviders() { // Create simplified structure with providers containing their models directly const normalizedProviders: NormalizedProvider[] = filteredProviderModelData.map(data => { - // Deduplicate models within each provider by slug and filter out free models + // Deduplicate models within each provider by slug const uniqueModelsMap = new Map(); data.models.forEach(model => { - // Skip the typically rate-limited-to-death free models - if ( - !( - isRateLimitedToDeathFree(model.endpoint?.model_variant_slug.toLowerCase() ?? '') || - isRateLimitedToDeathFree(model.slug.toLowerCase()) - ) - ) { - uniqueModelsMap.set(model.slug, model); - } + uniqueModelsMap.set(normalizeModelId(model.slug), model); }); const uniqueModels = Array.from(uniqueModelsMap.values()); diff --git a/src/lib/providers/recommended-models.ts b/src/lib/providers/recommended-models.ts index 96fa8389ed..bd906f1bdb 100644 --- a/src/lib/providers/recommended-models.ts +++ b/src/lib/providers/recommended-models.ts @@ -2,7 +2,6 @@ import type { ModelSettings, VersionedSettings } from '@/lib/organizations/organ import { KILO_AUTO_MODEL_ID } from '@/lib/kilo-auto-model'; import { giga_potato_model } from '@/lib/providers/gigapotato'; import { minimax_m21_free_model } from '@/lib/providers/minimax'; -import { pony_alpha_free_model } from '@/lib/providers/openrouter-free-models'; import { zai_glm47_free_model } from '@/lib/providers/zai'; import { grok_code_fast_1_optimized_free_model } from '@/lib/providers/xai'; @@ -36,7 +35,7 @@ export const recommendedModels = [ random_vercel_routing: false, }, { - public_id: pony_alpha_free_model.public_id, + public_id: 'openrouter/pony-alpha', tool_choice_required: false, random_vercel_routing: false, }, diff --git a/src/lib/providers/streamlake.ts b/src/lib/providers/streamlake.ts deleted file mode 100644 index b702557558..0000000000 --- a/src/lib/providers/streamlake.ts +++ /dev/null @@ -1,15 +0,0 @@ -import { type KiloFreeModel } from '@/lib/providers/kilo-free-model'; - -export const kat_coder_pro_free_model = { - public_id: 'kwaipilot/kat-coder-pro:free', - display_name: 'Kwaipilot: KAT-Coder-Pro V1 (free)', - description: `KAT-Coder-Pro V1 is KwaiKAT's most advanced agentic coding model in the KwaiKAT series. It excels in real-world software engineering scenarios and has been rigorously tested by thousands of engineers. It achieves a 73.4% solve rate on the SWE-Bench Verified benchmark, hits 64 on the Artificial Analysis Intelligence Index, and ranks 10th globally among all models, as well as 1st among non-reasoning models! -The model has been optimized for tool-use capability, multi-turn interaction, instruction following, generalization and comprehensive capabilities through a multi-stage training process, including mid-training, supervised fine-tuning, reinforcement fine-tuning, and scalable agentic RL.`, - context_length: 256000, - max_completion_tokens: 32768, - is_enabled: false, - flags: ['reasoning', 'prompt_cache'], - gateway: 'streamlake', - internal_id: 'ep-4makks-1765348062249697557', - inference_providers: ['streamlake'], -} as KiloFreeModel; diff --git a/src/tests/openrouter-models-sorting.approved.json b/src/tests/openrouter-models-sorting.approved.json index 3625343cdd..0e4846e6cc 100644 --- a/src/tests/openrouter-models-sorting.approved.json +++ b/src/tests/openrouter-models-sorting.approved.json @@ -144,49 +144,6 @@ } } }, - { - "id": "openrouter/pony-alpha", - "canonical_slug": "openrouter/pony-alpha", - "hugging_face_id": "", - "name": "Pony Alpha (free)", - "created": 1756238927, - "description": "Pony Alpha is a stealth model optimized for speed and enhanced reasoning capabilities. It is provided free of charge in Kilo Code for a limited time.\n**Note:** Prompts and completions are logged and may be used to improve the model.", - "context_length": 200000, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000000", - "completion": "0.0000000", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "top_provider": { - "context_length": 200000, - "max_completion_tokens": 32000, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "max_tokens", - "temperature", - "tools", - "reasoning", - "include_reasoning" - ], - "default_parameters": {}, - "preferredIndex": 4 - }, { "id": "giga-potato", "canonical_slug": "giga-potato", @@ -243,47 +200,6 @@ } } }, - { - "id": "arcee-ai/trinity-large-preview:free", - "canonical_slug": "arcee-ai/trinity-large-preview:free", - "hugging_face_id": "", - "name": "Arcee AI: Trinity Large Preview (free)", - "created": 1756238927, - "description": "Trinity Large Preview is a state-of-the-art large language model from Arcee AI, optimized for coding and general-purpose assistance.", - "context_length": 128000, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000000", - "completion": "0.0000000", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "top_provider": { - "context_length": 128000, - "max_completion_tokens": 16384, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "max_tokens", - "temperature", - "tools" - ], - "default_parameters": {}, - "preferredIndex": 6 - }, { "id": "anthropic/claude-sonnet-4", "name": "Claude Sonnet 4",