@@ -219,9 +219,9 @@ def main(
219219if __name__ == "__main__" :
220220 # Parse command-line arguments for matrix dimensions
221221 parser = argparse .ArgumentParser (description = "Autotuned MatMul Benchmark" )
222- parser .add_argument ("--m" , type = int , default = 16384 , help = "Matrix dimension M" )
223- parser .add_argument ("--n" , type = int , default = 16384 , help = "Matrix dimension N" )
224- parser .add_argument ("--k" , type = int , default = 16384 , help = "Matrix dimension K" )
222+ parser .add_argument ("--m" , type = int , default = 8192 , help = "Matrix dimension M" )
223+ parser .add_argument ("--n" , type = int , default = 8192 , help = "Matrix dimension N" )
224+ parser .add_argument ("--k" , type = int , default = 8192 , help = "Matrix dimension K" )
225225 parser .add_argument (
226226 "--with_roller" ,
227227 action = "store_true" ,
@@ -237,13 +237,11 @@ def main(
237237
238238 # matmul(...) returns (best_latency, best_config, ref_latency)
239239 best_result = matmul (M , N , K , with_roller )
240+ print (best_result .get_kernel_source ())
240241 best_latency = best_result .latency
241242 best_config = best_result .config
242- ref_latency = best_result .ref_latency
243243
244244 # Print out the benchmark results
245245 print (f"Best latency (s): { best_latency } " )
246246 print (f"Best TFlops: { total_flops / best_latency * 1e-9 :.3f} " )
247247 print (f"Best config: { best_config } " )
248-
249- print (f"Reference TFlops: { total_flops / ref_latency * 1e-9 :.3f} " )
0 commit comments