Skip to content

Commit f9f2f8d

Browse files
committed
remove lambda, scale for fair comparison
1 parent 50ac2cc commit f9f2f8d

File tree

1 file changed

+6
-8
lines changed

1 file changed

+6
-8
lines changed

benchmarks/benchmark_e2e_fp8_sparse_linear.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -41,18 +41,16 @@ def benchmark(num_tokens, hidden_size=8192, intermediate_size=8192):
4141
fp16_time = benchmark_microseconds(ffn_ref, input_tensor)
4242

4343
# Sparsify-only benchmarks
44-
X_scale = torch.empty([num_tokens, 1], device="cuda", dtype=torch.float32)
45-
ao_cusparse_time = benchmark_microseconds(
46-
lambda: torch.ops.torchao.sparse24_sm90_sparsify(
44+
ao_fast_sparsification_time = benchmark_microseconds(
45+
torch.ops.torchao.sparse24_sm90_sparsify(
4746
input_tensor,
4847
"cutlass",
4948
"srelu",
5049
"largest",
5150
dtype=torch.float8_e4m3fn,
52-
scale=X_scale,
5351
)
5452
)
55-
cusparse_time = benchmark_microseconds(lambda: torch._cslt_compress(input_tensor))
53+
cusparse_time = benchmark_microseconds(torch._cslt_compress, input_tensor)
5654

5755
# bf16
5856
ffn_clone = (
@@ -131,10 +129,10 @@ def benchmark(num_tokens, hidden_size=8192, intermediate_size=8192):
131129
"fp8_c_time (us)": fp8_c_time,
132130
"fp8_c_sparse_time (us)": fp8_c_sparse_time,
133131
"fp8_c_activation_sparse_time (us)": fp8_c_activation_sparse_time,
134-
"ao_cusparse_time (us)": ao_cusparse_time,
135-
"cusparse_compress_time (us)": cusparse_time,
132+
"ao_fast_sparsification_time (us)": ao_fast_sparsification_time,
133+
"cusparse*_compress_time (us)": cusparse_time,
136134
"speedup": fp8_c_time / fp8_c_activation_sparse_time,
137-
"sparsify_speedup": cusparse_time / ao_cusparse_time,
135+
"sparsify_speedup": cusparse_time / ao_fast_sparsification_time,
138136
}
139137

140138

0 commit comments

Comments
 (0)