diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py index 830a6752cd62..243af5087f4d 100644 --- a/python/sglang/srt/layers/quantization/fp8.py +++ b/python/sglang/srt/layers/quantization/fp8.py @@ -1382,7 +1382,6 @@ def apply_with_router_logits( if routed_scaling_factor is not None else 1.0 ), - tile_tokens_dim=None, routing_method_type=routing_method_type, use_shuffled_weight=False, tune_max_num_tokens=next_power_of_2(a_q.shape[0]), diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py index d4faa2ddf55e..74fdc22f59ce 100755 --- a/python/sglang/srt/layers/quantization/modelopt_quant.py +++ b/python/sglang/srt/layers/quantization/modelopt_quant.py @@ -785,7 +785,6 @@ def apply( else 1.0 ), use_routing_scales_on_input=use_routing_scales_on_input, - tile_tokens_dim=None, routing_method_type=routing_method_type, tune_max_num_tokens=next_power_of_2(x.shape[0]), )