diff --git a/models/JetBrains/Mellum2-12B-A2.5B-Instruct.yaml b/models/JetBrains/Mellum2-12B-A2.5B-Instruct.yaml new file mode 100644 index 00000000..c5d33315 --- /dev/null +++ b/models/JetBrains/Mellum2-12B-A2.5B-Instruct.yaml @@ -0,0 +1,121 @@ +meta: + title: "Mellum2-12B-A2.5B-Instruct" + slug: "mellum2-12b-a2.5b-instruct" + provider: "JetBrains" + description: "JetBrains' instruction-tuned code MoE (12B total / 2.5B active) that answers directly without an externalized chain of thought — low-latency coding and tool use" + date_updated: 2026-06-02 + difficulty: intermediate + tasks: + - text + performance_headline: "78.4 EvalPlus, 67.1 MultiPL-E — direct answers, fits on a single GPU" + related_recipes: + - "JetBrains/Mellum2-12B-A2.5B-Thinking" + hardware: + h200: verified + +model: + model_id: "JetBrains/Mellum2-12B-A2.5B-Instruct" + min_vllm_version: "nightly" + nightly_required: true + architecture: moe + parameter_count: "12B" + active_parameters: "2.5B" + context_length: 131072 + base_args: + - "--max-model-len" + - "131072" + base_env: {} + +features: + tool_calling: + description: "Hermes tool-call parser for function calling" + args: + - "--enable-auto-tool-choice" + - "--tool-call-parser" + - "hermes" + +opt_in_features: [] + +variants: + default: + precision: bf16 + vram_minimum_gb: 29 + description: "Native bfloat16 weights; fits comfortably on a single H200/H100/A100" + +compatible_strategies: + - single_node_tp + - single_node_tep + - single_node_dep + +strategy_overrides: + single_node_tp: + tp: 1 + +guide: | + ## Overview + + [Mellum2-12B-A2.5B-Instruct](https://huggingface.co/JetBrains/Mellum2-12B-A2.5B-Instruct) + is JetBrains' instruction-tuned code assistant. It shares the same Mixture-of-Experts + backbone as the rest of the Mellum2 family — 64 experts (8 activated per token), 12B total + / 2.5B active parameters, sliding-window + full-attention layers, 131,072-token context — + but is post-trained (SFT + RLVR on math, coding, tool use, instruction following, reasoning, + and knowledge) to **answer directly, without an externalized chain of thought**. For complex + debugging, multi-step planning, or math/reasoning-heavy tasks where you want explicit + reasoning traces, use the + [Thinking](https://huggingface.co/JetBrains/Mellum2-12B-A2.5B-Thinking) variant instead. + + ## Prerequisites + + - Hardware: a single H200, H100, or A100 (~29 GB at bf16) is plenty + - vLLM **nightly** — `MellumForCausalLM` support landed after v0.22.0 and is not yet in a + stable release. Install the nightly wheels until the next tagged release ships. + + ### Install vLLM (nightly) + + ```bash + uv venv + source .venv/bin/activate + uv pip install -U vllm --extra-index-url https://wheels.vllm.ai/nightly + ``` + + ## Launch command + + Unlike the Thinking checkpoint, Instruct does not emit `` blocks, so no + `--reasoning-parser` is needed. + + ```bash + # Plain serving + vllm serve JetBrains/Mellum2-12B-A2.5B-Instruct \ + --max-model-len 131072 + + # Add tool calling + vllm serve JetBrains/Mellum2-12B-A2.5B-Instruct \ + --max-model-len 131072 \ + --enable-auto-tool-choice \ + --tool-call-parser hermes + ``` + + ## Client usage + + JetBrains recommends sampling at `temperature=0.6`, `top_p=0.95`, `top_k=20`. + + ```python + from openai import OpenAI + client = OpenAI(base_url="http://localhost:8000/v1", api_key="EMPTY") + + resp = client.chat.completions.create( + model="JetBrains/Mellum2-12B-A2.5B-Instruct", + messages=[{"role": "user", "content": "Write a Python function to reverse a string."}], + max_tokens=81920, + temperature=0.6, + top_p=0.95, + extra_body={"top_k": 20}, + ) + print(resp.choices[0].message.content) + ``` + + ## References + + - [Model card](https://huggingface.co/JetBrains/Mellum2-12B-A2.5B-Instruct) + - [vLLM support PR #43992](https://github.com/vllm-project/vllm/pull/43992) + - [Mellum2 Technical Report](https://arxiv.org/abs/2605.31268) diff --git a/models/JetBrains/Mellum2-12B-A2.5B-Thinking.yaml b/models/JetBrains/Mellum2-12B-A2.5B-Thinking.yaml new file mode 100644 index 00000000..59d2e604 --- /dev/null +++ b/models/JetBrains/Mellum2-12B-A2.5B-Thinking.yaml @@ -0,0 +1,125 @@ +meta: + title: "Mellum2-12B-A2.5B-Thinking" + slug: "mellum2-12b-a2.5b-thinking" + provider: "JetBrains" + description: "JetBrains' reasoning-augmented code MoE (12B total / 2.5B active) that emits explicit chains for debugging, planning, and agentic coding" + date_updated: 2026-06-02 + difficulty: intermediate + tasks: + - text + performance_headline: "69.9 LiveCodeBench v6, 58.4 AIME — fits on a single GPU" + related_recipes: + - "JetBrains/Mellum2-12B-A2.5B-Instruct" + hardware: + h200: verified + +model: + model_id: "JetBrains/Mellum2-12B-A2.5B-Thinking" + min_vllm_version: "nightly" + nightly_required: true + architecture: moe + parameter_count: "12B" + active_parameters: "2.5B" + context_length: 131072 + base_args: + - "--max-model-len" + - "131072" + base_env: {} + +features: + reasoning: + description: "Parse the model's ... reasoning blocks (Qwen3-style parser)" + args: + - "--reasoning-parser" + - "qwen3" + tool_calling: + description: "Hermes tool-call parser for function calling" + args: + - "--enable-auto-tool-choice" + - "--tool-call-parser" + - "hermes" + +opt_in_features: [] + +variants: + default: + precision: bf16 + vram_minimum_gb: 29 + description: "Native bfloat16 weights; fits comfortably on a single H200/H100/A100" + +compatible_strategies: + - single_node_tp + - single_node_tep + - single_node_dep + +strategy_overrides: + single_node_tp: + tp: 1 + +guide: | + ## Overview + + [Mellum2-12B-A2.5B-Thinking](https://huggingface.co/JetBrains/Mellum2-12B-A2.5B-Thinking) + is JetBrains' reasoning-augmented code assistant. It uses a Mixture-of-Experts architecture + with 64 experts (8 activated per token) — 12B total parameters, 2.5B active — combining + sliding-window and full-attention layers for a 131,072-token context. The model emits its + chain-of-thought inside `...` blocks before the final answer, making it + suited to complex debugging, multi-step planning, and agentic workflows. For direct, + low-latency answers without reasoning traces, use the + [Instruct](https://huggingface.co/JetBrains/Mellum2-12B-A2.5B-Instruct) variant instead. + + ## Prerequisites + + - Hardware: a single H200, H100, or A100 (~29 GB at bf16) is plenty + - vLLM **nightly** — `MellumForCausalLM` support landed after v0.22.0 and is not yet in a + stable release. Install the nightly wheels until the next tagged release ships. + + ### Install vLLM (nightly) + + ```bash + uv venv + source .venv/bin/activate + uv pip install -U vllm --extra-index-url https://wheels.vllm.ai/nightly + ``` + + ## Launch command + + ```bash + # With reasoning (recommended for the Thinking checkpoint) + vllm serve JetBrains/Mellum2-12B-A2.5B-Thinking \ + --max-model-len 131072 \ + --reasoning-parser qwen3 + + # Add tool calling + vllm serve JetBrains/Mellum2-12B-A2.5B-Thinking \ + --max-model-len 131072 \ + --reasoning-parser qwen3 \ + --enable-auto-tool-choice \ + --tool-call-parser hermes + ``` + + ## Client usage + + JetBrains recommends sampling at `temperature=0.6`, `top_p=0.95`, `top_k=20` for the + Thinking checkpoint. + + ```python + from openai import OpenAI + client = OpenAI(base_url="http://localhost:8000/v1", api_key="EMPTY") + + resp = client.chat.completions.create( + model="JetBrains/Mellum2-12B-A2.5B-Thinking", + messages=[{"role": "user", "content": "Is 1024 a power of 2? Explain your reasoning."}], + max_tokens=81920, + temperature=0.6, + top_p=0.95, + extra_body={"top_k": 20}, + ) + print(resp.choices[0].message.content) + ``` + + ## References + + - [Model card](https://huggingface.co/JetBrains/Mellum2-12B-A2.5B-Thinking) + - [vLLM support PR #43992](https://github.com/vllm-project/vllm/pull/43992) + - [Mellum2 Technical Report](https://arxiv.org/abs/2605.31268) diff --git a/src/lib/providers.js b/src/lib/providers.js index 89758e7b..d77930e8 100644 --- a/src/lib/providers.js +++ b/src/lib/providers.js @@ -36,6 +36,7 @@ export const PROVIDERS = { "stabilityai": { display_name: "Stability AI", logo: "/providers/stabilityai.png" }, "stepfun-ai": { display_name: "StepFun", logo: "/providers/stepfun-ai.png" }, "poolside": { display_name: "Poolside", logo: "/providers/poolside.png" }, + "JetBrains": { display_name: "JetBrains", logo: "/providers/JetBrains.png" }, }; export function getProviderLogo(hfOrg) {