diff --git a/recipes/deepseek-r1/vllm/disagg/deploy_hopper_16gpu.yaml b/recipes/deepseek-r1/vllm/disagg/deploy_hopper_16gpu.yaml index 1395065d4b8..4b864b950c1 100644 --- a/recipes/deepseek-r1/vllm/disagg/deploy_hopper_16gpu.yaml +++ b/recipes/deepseek-r1/vllm/disagg/deploy_hopper_16gpu.yaml @@ -87,7 +87,7 @@ spec: --enable-eplb \ --eplb-config '{"window_size":"1000","step_interval":"3000","num_redundant_experts":"32","log_balancedness":"False"}' \ --max-num-seqs 512 \ - --compilation_config '{"pass_config":{"enable_fusion":true,"enable_attn_fusion":true,"enable_noop":true},"custom_ops":["+rms_norm"],"cudagraph_mode":"FULL_DECODE_ONLY"}' + --compilation_config '{"pass_config":{"fuse_norm_quant":true,"eliminate_noops":true},"cudagraph_mode":"FULL_DECODE_ONLY"}' prefill: componentType: worker subComponentType: prefill