Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aiter/configs/tuned_gemm.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
M,N,K,bias,dtype,outdtype,scaleAB,cu_num,libtype,solidx,splitK,soltimes,kernelName,err_ratio,tflops,bw
M,N,K,bias,dtype,outdtype,scaleAB,cu_num,libtype,solidx,splitK,us,kernelName,err_ratio,tflops,bw
64,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,1,10,8.3,_ZN5aiter28bf16gemm_outf32_tn_32x64_pf3E,0.0,20.21,402.69
80,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,2,10,8.8,_ZN5aiter28bf16gemm_outf32_tn_48x64_pf3E,0.0,23.83,400.29
128,256,5120,False,torch.bfloat16,torch.float32,False,80,asm,3,10,11.2,_ZN5aiter28bf16gemm_outf32_tn_64x64_pf3E,0.0,29.96,362.79
Expand Down
7 changes: 5 additions & 2 deletions aiter/utility/base_tuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import torch
import pandas as pd

from abc import ABC, abstractmethod
from abc import abstractmethod
from aiter import logger
import traceback
from operator import itemgetter
Expand Down Expand Up @@ -37,6 +37,7 @@ class TunerCommon:
dtypes.fp4x2: 1,
torch.uint8: 1,
torch.uint32: 4,
dtypes.fp32: 4,
torch.int4: 1 / 2,
torch.float8_e4m3fnuz: 1,
torch.float8_e4m3fn: 1,
Expand Down Expand Up @@ -76,13 +77,15 @@ def _setup_common_arguments(self):
"-i",
"--untune_file",
default=defaults["untune_file"],
dest="untune_file",
required=False,
help="input",
)
self.parser.add_argument(
"-o",
"--tune_file",
default=defaults["tune_file"],
dest="tune_file",
required=False,
help="output: tuning result store this file",
)
Expand Down Expand Up @@ -494,7 +497,7 @@ def result_to_csv(self, resultdf, file, concat=False):
[self.success, resultdf[resultdf["us"] != self.INVALID_TIME]],
ignore_index=True,
)
update_tunedf = self.success
update_tunedf = resultdf[resultdf["us"] != self.INVALID_TIME] # self.success
if not concat:
resultdf = self.update_tunedf(old_df, update_tunedf)
else:
Expand Down
Loading