Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 0 additions & 39 deletions aiter/configs/a8w8_blockscale_tuned_gemm.csv
Original file line number Diff line number Diff line change
Expand Up @@ -117,122 +117,83 @@ cu_num,M,N,K,kernelId,splitK,us,kernelName,tflops,bw,errRatio
304,20480,512,7168,2,0,256.7931,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,585.39,667.63,0.0
304,20480,4096,512,0,0,183.337,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,468.53,983.74,0.0
256,16,1536,7168,8,0,20.8535,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,16.9,535.83,0.0
256,16,3072,1536,8,0,7.66,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,19.71,632.05,0.0
256,16,576,7168,8,0,19.9031,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,6.64,214.13,0.0
256,16,7168,256,8,0,3.6287,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,16.18,570.03,0.0
256,16,7168,2048,8,0,8.0688,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,58.22,1851.85,0.0
256,16,4608,7168,8,0,21.2231,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,49.8,1568.68,0.0
256,16,7168,2304,8,0,8.6748,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,60.92,1934.49,0.0
256,16,512,7168,8,0,19.9419,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,5.89,190.61,0.0
256,16,4096,512,13,0,3.5903,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,18.69,622.91,0.0
256,32,1536,7168,8,0,20.7843,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,33.9,545.49,0.0
256,32,3072,1536,8,0,7.8864,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,38.29,629.48,0.0
256,32,576,7168,8,0,19.9519,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,13.24,220.28,0.0
256,32,7168,256,7,0,3.6839,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,31.88,624.87,0.0
256,32,7168,2048,8,0,8.2088,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,114.45,1852.2,0.0
256,32,4608,7168,8,0,21.1971,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,99.73,1582.97,0.0
256,32,7168,2304,8,0,8.942,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,118.2,1906.46,0.0
256,32,512,7168,8,0,19.9219,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,11.79,197.38,0.0
256,32,4096,512,7,0,3.8435,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,34.92,618.1,0.0
256,64,1536,7168,8,0,20.5651,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,68.53,567.24,0.0
256,64,3072,1536,18,0,8.078,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,74.77,644.98,0.0
256,64,576,7168,8,0,19.7455,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,26.76,236.07,0.0
256,64,7168,256,11,0,3.7547,a8w8_blockscale_1x128x128_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8_2x1_intrawave_v1,62.56,737.45,0.0
256,64,7168,2048,7,0,9.2928,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,202.2,1692.56,0.0
256,64,4608,7168,7,0,24.0952,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,175.46,1414.34,0.0
256,64,7168,2304,7,0,10.0892,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,209.52,1742.46,0.0
256,64,512,7168,8,0,19.6915,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,23.86,213.0,0.0
256,64,4096,512,13,0,4.0395,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,66.45,657.06,0.0
256,128,1536,7168,8,0,20.1019,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,140.21,612.92,0.0
256,128,3072,1536,7,0,7.8296,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,154.28,728.21,0.0
256,128,576,7168,8,0,19.3303,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,54.68,268.68,0.0
256,128,7168,256,7,0,3.9827,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,117.95,929.72,0.0
256,128,7168,2048,18,0,10.1248,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,371.18,1657.04,0.0
256,128,4608,7168,18,0,24.7832,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,341.19,1417.38,0.0
256,128,7168,2304,18,0,11.0009,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,384.32,1694.86,0.0
256,128,512,7168,8,0,19.0891,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,49.22,247.19,0.0
256,128,4096,512,8,0,4.3587,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,123.17,736.75,0.0
256,256,1536,7168,7,0,23.4904,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,239.98,580.3,0.0
256,256,3072,1536,12,0,8.9712,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,269.3,745.13,0.0
256,256,576,7168,8,0,19.6847,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,107.39,317.95,0.0
256,256,7168,256,18,0,5.6643,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,165.87,983.45,0.0
256,256,7168,2048,18,0,14.8749,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,505.29,1268.87,0.0
256,256,4608,7168,18,0,36.4007,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,464.59,1022.63,0.0
256,256,7168,2304,18,0,15.9582,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,529.87,1301.83,0.0
256,256,512,7168,8,0,19.4919,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,96.4,295.88,0.0
256,256,4096,512,13,0,5.096,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,210.7,848.78,0.0
256,512,1536,7168,18,0,24.5324,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,459.57,662.51,0.0
256,512,3072,1536,18,0,11.8645,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,407.25,729.13,0.0
256,512,576,7168,18,0,23.3536,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,181.04,359.2,0.0
256,512,7168,256,13,0,8.214,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,228.76,1132.96,0.0
256,512,7168,2048,18,0,25.7748,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,583.22,895.01,0.0
256,512,4608,7168,18,0,58.822,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,575.0,704.14,0.0
256,512,7168,2304,18,0,28.4393,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,594.65,880.29,0.0
256,512,512,7168,8,0,20.1543,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,186.47,390.21,0.0
256,512,4096,512,3,0,8.2148,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,261.42,797.78,0.0
256,1024,1536,7168,18,0,36.4647,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.37,589.5,0.0
256,1024,3072,1536,18,0,17.5794,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,549.72,715.78,0.0
256,1024,576,7168,18,0,23.9404,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,353.2,528.33,0.0
256,1024,7168,256,16,0,12.6589,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,296.87,1325.33,0.0
256,1024,7168,2048,0,0,46.861,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,641.57,671.29,0.0
256,1024,4608,7168,18,0,99.4691,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,680.07,500.73,0.0
256,1024,7168,2304,0,0,49.965,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,676.93,671.56,0.0
256,1024,512,7168,7,0,23.5068,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,319.75,512.98,0.0
256,1024,4096,512,18,0,11.0785,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,387.68,993.82,0.0
256,1536,1536,7168,18,0,58.096,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,582.19,460.25,0.0
256,1536,3072,1536,18,0,25.5804,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,566.66,645.61,0.0
256,1536,576,7168,18,0,25.1476,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,504.37,672.36,0.0
256,1536,7168,256,18,0,16.5106,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,341.43,1468.65,0.0
256,1536,7168,2048,0,0,66.7266,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.85,597.15,0.0
256,1536,4608,7168,18,0,139.826,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,725.68,416.2,0.0
256,1536,7168,2304,0,0,73.8284,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.19,569.89,0.0
256,1536,512,7168,18,0,24.7692,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,455.17,656.17,0.0
256,1536,4096,512,18,0,14.5922,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,441.5,1059.92,0.0
256,2048,1536,7168,18,0,64.5154,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,699.01,495.72,0.0
256,2048,3072,1536,2,0,31.1726,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,620.01,655.94,0.0
256,2048,576,7168,18,0,35.1627,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,480.95,602.01,0.0
256,2048,7168,256,11,0,21.3779,a8w8_blockscale_1x128x128_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8_2x1_intrawave_v1,351.59,1483.75,0.0
256,2048,7168,2048,0,0,84.8351,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,708.78,568.57,0.0
256,2048,4608,7168,18,0,180.0698,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,751.33,369.77,0.0
256,2048,7168,2304,0,0,92.2125,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,733.59,548.67,0.0
256,2048,512,7168,18,0,25.826,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,582.06,791.73,0.0
256,2048,4096,512,18,0,18.1866,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,472.32,1095.47,0.0
256,4096,1536,7168,18,0,124.0493,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,727.08,426.87,0.0
256,4096,3072,1536,0,0,56.5736,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,683.26,639.45,0.0
256,4096,576,7168,18,0,57.2232,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,591.07,667.69,0.0
256,4096,7168,256,18,0,37.2575,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,403.47,1653.46,0.0
256,4096,7168,2048,0,0,150.9763,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.54,541.73,0.0
256,4096,4608,7168,0,0,310.9487,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,870.19,322.04,0.0
256,4096,7168,2304,0,0,161.8222,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.05,523.24,0.0
256,4096,512,7168,18,0,44.5137,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,675.4,836.25,0.0
256,4096,4096,512,18,0,31.979,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,537.22,1180.42,0.0
256,8192,1536,7168,0,0,210.661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,856.3,450.47,0.0
256,8192,3072,1536,0,0,103.7592,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.08,651.83,0.0
256,8192,576,7168,18,0,101.6527,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,665.46,711.11,0.0
256,8192,7168,256,17,0,73.9508,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,406.55,1641.26,0.0
256,8192,7168,2048,0,0,269.0296,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,894.02,553.46,0.0
256,8192,4608,7168,0,0,539.5795,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1002.94,309.96,0.0
256,8192,7168,2304,0,0,285.6152,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,947.37,535.09,0.0
256,8192,512,7168,18,0,86.0755,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,698.57,822.29,0.0
256,8192,4096,512,18,0,63.5698,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,540.5,1154.64,0.0
256,16384,1536,7168,0,0,371.4478,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,971.27,481.31,0.0
256,16384,3072,1536,0,0,186.8656,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.43,698.62,0.0
256,16384,576,7168,18,0,186.7632,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,724.4,751.99,0.0
256,16384,7168,256,17,0,141.6637,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,424.45,1700.58,0.0
256,16384,7168,2048,0,0,508.5984,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,945.81,556.66,0.0
256,16384,4608,7168,0,0,1046.6928,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1034.05,288.02,0.0
256,16384,7168,2304,0,0,539.2895,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1003.48,536.16,0.0
256,16384,512,7168,0,0,143.7706,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.47,959.08,0.0
256,16384,4096,512,0,0,119.5724,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,574.71,1210.17,0.0
256,20480,1536,7168,0,0,464.4401,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,971.0,475.25,0.0
256,20480,3072,1536,0,0,223.3253,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,865.43,725.42,0.0
256,20480,576,7168,18,0,230.9895,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,732.13,755.54,0.0
256,20480,7168,256,18,0,167.1547,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,449.65,1798.81,0.0
256,20480,7168,2048,0,0,630.5854,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,953.55,555.4,0.0
256,20480,4608,7168,0,0,1309.3769,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1033.25,281.49,0.0
256,20480,7168,2304,0,0,670.7196,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1008.55,532.71,0.0
256,20480,512,7168,2,0,193.5078,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.84,885.97,0.0
256,20480,4096,512,18,0,144.807,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,593.2,1245.49,0.0
80,16,1536,7168,8,0,23.9251,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,14.73,467.04,0.0
80,16,3072,1536,8,0,8.4118,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,17.95,575.56,0.0
80,16,576,7168,8,0,23.4551,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,5.63,181.7,0.0
Expand Down
39 changes: 0 additions & 39 deletions aiter/configs/a8w8_blockscale_untuned_gemm.csv
Original file line number Diff line number Diff line change
@@ -1,118 +1,79 @@
M,N,K
16, 1536, 7168
16, 3072, 1536
16, 576, 7168
16, 7168, 256
16, 7168, 2048
16, 4608, 7168
16, 7168, 2304
16, 512, 7168
16, 4096, 512
32, 1536, 7168
32, 3072, 1536
32, 576, 7168
32, 7168, 256
32, 7168, 2048
32, 4608, 7168
32, 7168, 2304
32, 512, 7168
32, 4096, 512
64, 1536, 7168
64, 3072, 1536
64, 576, 7168
64, 7168, 256
64, 7168, 2048
64, 4608, 7168
64, 7168, 2304
64, 512, 7168
64, 4096, 512
128, 1536, 7168
128, 3072, 1536
128, 576, 7168
128, 7168, 256
128, 7168, 2048
128, 4608, 7168
128, 7168, 2304
128, 512, 7168
128, 4096, 512
256, 1536, 7168
256, 3072, 1536
256, 576, 7168
256, 7168, 256
256, 7168, 2048
256, 4608, 7168
256, 7168, 2304
256, 512, 7168
256, 4096, 512
512, 1536, 7168
512, 3072, 1536
512, 576, 7168
512, 7168, 256
512, 7168, 2048
512, 4608, 7168
512, 7168, 2304
512, 512, 7168
512, 4096, 512
1024, 1536, 7168
1024, 3072, 1536
1024, 576, 7168
1024, 7168, 256
1024, 7168, 2048
1024, 4608, 7168
1024, 7168, 2304
1024, 512, 7168
1024, 4096, 512
1536, 1536, 7168
1536, 3072, 1536
1536, 576, 7168
1536, 7168, 256
1536, 7168, 2048
1536, 4608, 7168
1536, 7168, 2304
1536, 512, 7168
1536, 4096, 512
2048, 1536, 7168
2048, 3072, 1536
2048, 576, 7168
2048, 7168, 256
2048, 7168, 2048
2048, 4608, 7168
2048, 7168, 2304
2048, 512, 7168
2048, 4096, 512
4096, 1536, 7168
4096, 3072, 1536
4096, 576, 7168
4096, 7168, 256
4096, 7168, 2048
4096, 4608, 7168
4096, 7168, 2304
4096, 512, 7168
4096, 4096, 512
8192, 1536, 7168
8192, 3072, 1536
8192, 576, 7168
8192, 7168, 256
8192, 7168, 2048
8192, 4608, 7168
8192, 7168, 2304
8192, 512, 7168
8192, 4096, 512
16384, 1536, 7168
16384, 3072, 1536
16384, 576, 7168
16384, 7168, 256
16384, 7168, 2048
16384, 4608, 7168
16384, 7168, 2304
16384, 512, 7168
16384, 4096, 512
20480, 1536, 7168
20480, 3072, 1536
20480, 576, 7168
20480, 7168, 256
20480, 7168, 2048
20480, 4608, 7168
20480, 7168, 2304
20480, 512, 7168
20480, 4096, 512
Loading