diff --git a/python/triton/ops/matmul_perf_model.py b/python/triton/ops/matmul_perf_model.py
index 1e07b0a029bb..19e93268ec0b 100644
--- a/python/triton/ops/matmul_perf_model.py
+++ b/python/triton/ops/matmul_perf_model.py
@@ -12,7 +12,7 @@ def get_tensorcore_tflops(backend, device, num_ctas, num_warps, dtype):
     ''' return compute throughput in TOPS '''
     total_warps = num_ctas * min(num_warps, 4)
     num_subcores = driver.utils.get_device_properties(device)["multiprocessor_count"] * 4  # on recent GPUs
-    cur_sm_clock = nvsmi(['clocks.current.sm'])[0]
+    cur_sm_clock = nvsmi(['clocks.max.sm'])[0]
     tflops = min(num_subcores, total_warps) / num_subcores * get_max_tensorcore_tflops(
         dtype, cur_sm_clock, backend, device)
     return tflops
@@ -22,7 +22,7 @@ def get_simd_tflops(backend, device, num_ctas, num_warps, dtype):
     ''' return compute throughput in TOPS '''
     total_warps = num_ctas * min(num_warps, 4)
     num_subcores = driver.utils.get_device_properties(device)["multiprocessor_count"] * 4  # on recent GPUs
-    cur_sm_clock = nvsmi(['clocks.current.sm'])[0]
+    cur_sm_clock = nvsmi(['clocks.max.sm'])[0]
     tflops = min(num_subcores, total_warps) / num_subcores * get_max_simd_tflops(dtype, cur_sm_clock, backend, device)
     return tflops