Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions src/components/autoregressive/Qwen35ConfigGenerator/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,29 +50,28 @@ const Qwen35ConfigGenerator = () => {
{ id: 'disabled', label: 'Disabled', default: false },
{ id: 'enabled', label: 'Enabled', default: true }
],
commandRule: (value) => value === 'enabled' ? '--speculative-algo NEXTN \\\n --speculative-num-steps 3 \\\n --speculative-eagle-topk 1 \\\n --speculative-num-draft-tokens 4' : null
commandRule: (value) => value === 'enabled' ? '--speculative-algorithm EAGLE \\\n --speculative-num-steps 3 \\\n --speculative-eagle-topk 1 \\\n --speculative-num-draft-tokens 4' : null
}
},

modelConfigs: {
h100: { bf16: { tp: 16, mem: 0.8 } },
h200: { bf16: { tp: 8, mem: 0.8 } },
b200: { bf16: { tp: 8, mem: 0.8 } }
b200: { bf16: { tp: 8, mem: 0.82 } }
},

generateCommand: function (values) {
const { hardware } = values;
const { hardware, speculative } = values;

const modelName = `${this.modelFamily}/Qwen3.5-397B-A17B`;

const hwConfig = this.modelConfigs[hardware].bf16;
const tpValue = hwConfig.tp;
const memFraction = hwConfig.mem;

// Initialize the base command
let cmd = 'python -m sglang.launch_server \\\n';
cmd += ` --model ${modelName}`;

// TP setting
cmd += ` \\\n --tp ${tpValue}`;

// Apply commandRule from all options
Expand All @@ -85,7 +84,18 @@ const Qwen35ConfigGenerator = () => {
}
});

// Memory fraction based on hardware
// Append B200-specific backend configurations
if (hardware === 'b200') {
cmd += ` \\\n --attention-backend trtllm_mha`;
cmd += ` \\\n --moe-runner-backend flashinfer_trtllm`;
cmd += ` \\\n --disable-radix-cache`;
cmd += ` \\\n --enable-flashinfer-allreduce-fusion`;
if (speculative === 'disabled') {
cmd += ` \\\n --tokenizer-worker-num 6`;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious, how do you get the number of 6?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reference: https://github.com/SemiAnalysisAI/InferenceX/pull/758/changes. Line 56 of benchmarks/single_node/qwen3.5_bf16_b200.sh. Let me know if there is any issue or suggestions!

}
}

// Add memory fraction
cmd += ` \\\n --mem-fraction-static ${memFraction}`;

return cmd;
Expand Down