diff --git a/aiter/configs/a8w8_blockscale_tuned_gemm.csv b/aiter/configs/a8w8_blockscale_tuned_gemm.csv index d3146fc1df..12df78b9ca 100644 --- a/aiter/configs/a8w8_blockscale_tuned_gemm.csv +++ b/aiter/configs/a8w8_blockscale_tuned_gemm.csv @@ -1,6 +1,6153 @@ gfx,cu_num,M,N,K,libtype,kernelId,splitK,us,kernelName,tflops,bw,errRatio -gfx950,256,8192,512,7168,ck,0,0,64.1614,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,937.16,1103.14,0.0 -gfx950,256,16384,512,7168,cktile,11,0,98.713,a8w8_blockscale_cktile_192x256x128_4x2x1_16x16x128_intrawave_0x1x0_1,1218.27,1396.85,0.0 -gfx950,256,20480,512,7168,cktile,27,0,95.1492,a8w8_blockscale_cktile_192x256x128_4x2x1_16x16x128_intrawave_0x1x0_3,1579.88,1801.82,0.0 -gfx950,256,128,1024,4096,ck,8,0,13.7599,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,78.03,361.97,0.0 -gfx950,256,128,4096,1280,ck,7,0,7.4194,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,180.9,870.06,0.0 +gfx950,256,7,8192,512,ck,13,0,3.7336,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,15.73,1155.07,0.0 +gfx950,256,3,8192,512,ck,13,0,3.7574,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,6.7,1129.77,0.0 +gfx950,256,6,8192,512,ck,13,0,3.7671,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,13.36,1140.31,0.0 +gfx950,256,10,8192,512,ck,13,0,3.8138,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,22.0,1144.07,0.0 +gfx950,256,11,8192,512,ck,8,0,3.9126,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,23.58,1119.5,0.0 +gfx950,256,15,8192,512,ck,8,0,3.9158,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,32.13,1135.85,0.0 +gfx950,256,23,8192,512,ck,13,0,3.9406,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,48.96,1163.0,0.0 +gfx950,256,24,8192,512,ck,13,0,3.9606,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,50.83,1161.39,0.0 +gfx950,256,26,8192,512,ck,13,0,3.9638,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,55.02,1168.98,0.0 +gfx950,256,27,8192,512,ck,13,0,3.9792,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,56.92,1168.7,0.0 +gfx950,256,28,8192,512,ck,13,0,3.9822,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,58.98,1172.06,0.0 +gfx950,256,17,8192,512,ck,8,0,4.0994,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,34.79,1093.22,0.0 +gfx950,256,30,8192,512,ck,8,0,4.1143,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,61.17,1142.64,0.0 +gfx950,256,34,8192,512,ck,13,0,4.1582,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,68.59,1146.83,0.0 +gfx950,256,33,8192,512,ck,13,0,4.1722,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,66.35,1138.94,0.0 +gfx950,256,38,8192,512,ck,13,0,4.1805,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,76.25,1156.88,0.0 +gfx950,256,37,8192,512,ck,13,0,4.1874,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,74.12,1150.94,0.0 +gfx950,256,45,8192,512,ck,13,0,4.2159,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,89.54,1175.22,0.0 +gfx950,256,35,8192,512,ck,13,0,4.2281,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,69.44,1131.87,0.0 +gfx950,256,41,8192,512,ck,13,0,4.2358,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,81.2,1153.75,0.0 +gfx950,256,42,8192,512,ck,13,0,4.237,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,83.15,1157.41,0.0 +gfx950,256,57,8192,512,ck,13,0,4.2966,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,111.29,1200.34,0.0 +gfx950,256,63,8192,512,ck,13,0,4.3334,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,121.96,1213.54,0.0 +gfx950,256,20,8192,512,ck,7,0,4.4006,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,38.12,1029.91,0.0 +gfx950,256,21,8192,512,ck,7,0,4.4128,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,39.92,1030.89,0.0 +gfx950,256,39,8192,512,ck,7,0,4.4339,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,73.79,1094.58,0.0 +gfx950,256,43,8192,512,ck,7,0,4.459,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,80.89,1103.57,0.0 +gfx950,256,44,8192,512,ck,7,0,4.4775,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,82.43,1102.79,0.0 +gfx950,256,58,8192,512,ck,7,0,4.4927,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,108.3,1151.71,0.0 +gfx950,256,50,8192,512,ck,8,0,4.4964,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,93.28,1120.7,0.0 +gfx950,256,61,8192,512,ck,7,0,4.515,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,113.33,1157.24,0.0 +gfx950,256,60,8192,512,ck,8,0,4.5647,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,110.26,1140.94,0.0 +gfx950,256,67,8192,512,ck,13,0,4.9762,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,112.94,1070.36,0.0 +gfx950,256,68,8192,512,ck,13,0,4.9794,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,114.56,1073.07,0.0 +gfx950,256,69,8192,512,ck,13,0,4.9987,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,115.79,1072.3,0.0 +gfx950,256,75,8192,512,ck,13,0,5.0003,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,125.82,1092.24,0.0 +gfx950,256,71,8192,512,ck,13,0,5.0032,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,119.04,1078.09,0.0 +gfx950,256,74,8192,512,ck,13,0,5.0043,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,124.04,1087.99,0.0 +gfx950,256,70,8192,512,ck,13,0,5.0118,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,117.16,1072.87,0.0 +gfx950,256,79,8192,512,ck,13,0,5.035,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,131.62,1098.13,0.0 +gfx950,256,76,8192,512,ck,13,0,5.0354,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,126.61,1087.98,0.0 +gfx950,256,72,8192,512,ck,13,0,5.0394,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,119.85,1073.7,0.0 +gfx950,256,85,8192,512,ck,13,0,5.0447,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,141.34,1116.11,0.0 +gfx950,256,82,8192,512,ck,13,0,5.0454,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,136.34,1105.91,0.0 +gfx950,256,84,8192,512,ck,13,0,5.0606,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,139.24,1109.27,0.0 +gfx950,256,83,8192,512,ck,12,0,5.0847,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,136.93,1100.69,0.0 +gfx950,256,73,8192,512,ck,12,0,5.0863,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,120.4,1067.12,0.0 +gfx950,256,86,8192,512,ck,13,0,5.0876,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,141.8,1110.02,0.0 +gfx950,256,87,8192,512,ck,13,0,5.0909,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,143.36,1112.62,0.0 +gfx950,256,81,8192,512,ck,18,0,5.1114,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,132.93,1088.33,0.0 +gfx950,256,104,8192,512,ck,13,0,5.1203,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,170.38,1162.33,0.0 +gfx950,256,94,8192,512,ck,13,0,5.1238,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,153.9,1128.56,0.0 +gfx950,256,108,8192,512,ck,13,0,5.1376,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,176.34,1171.57,0.0 +gfx950,256,90,8192,512,ck,12,0,5.1401,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,146.88,1111.84,0.0 +gfx950,256,106,8192,512,ck,13,0,5.1406,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,172.97,1164.32,0.0 +gfx950,256,109,8192,512,ck,13,0,5.1535,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,177.42,1171.24,0.0 +gfx950,256,103,8192,512,ck,13,0,5.1578,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,167.52,1150.61,0.0 +gfx950,256,102,8192,512,ck,18,0,5.1751,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,165.34,1143.49,0.0 +gfx950,256,113,8192,512,ck,13,0,5.1799,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,183.0,1178.31,0.0 +gfx950,256,107,8192,512,ck,13,0,5.1871,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,173.04,1157.14,0.0 +gfx950,256,117,8192,512,ck,13,0,5.1929,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,189.0,1188.38,0.0 +gfx950,256,124,8192,512,ck,13,0,5.2387,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,198.56,1200.57,0.0 +gfx950,256,120,8192,512,ck,13,0,5.239,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,192.14,1187.6,0.0 +gfx950,256,118,8192,512,ck,12,0,5.2478,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,188.62,1179.17,0.0 +gfx950,256,121,8192,512,ck,13,0,5.2517,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,193.27,1187.94,0.0 +gfx950,256,125,8192,512,ck,13,0,5.2621,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,199.27,1198.44,0.0 +gfx950,256,127,8192,512,ck,13,0,5.2923,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,201.3,1197.98,0.0 +gfx950,256,123,8192,512,ck,12,0,5.2936,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,194.91,1184.92,0.0 +gfx950,256,133,8192,512,ck,13,0,6.1567,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,181.21,1046.25,0.0 +gfx950,256,151,8192,512,ck,13,0,6.1791,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,204.99,1091.68,0.0 +gfx950,256,131,8192,512,ck,13,0,6.1819,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,177.76,1036.52,0.0 +gfx950,256,153,8192,512,ck,13,0,6.1991,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,207.04,1093.61,0.0 +gfx950,256,132,8192,512,ck,13,0,6.2003,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,178.59,1036.17,0.0 +gfx950,256,40,128,7168,ck,6,3,6.2114,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,11.82,195.52,0.0004 +gfx950,256,129,8192,512,ck,13,0,6.2185,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,174.02,1024.99,0.0 +gfx950,256,158,8192,512,ck,13,0,6.2551,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,211.89,1097.32,0.0 +gfx950,256,137,8192,512,ck,13,0,6.2551,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,183.73,1040.6,0.0 +gfx950,256,159,8192,512,ck,13,0,6.2711,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,212.69,1097.22,0.0 +gfx950,256,162,8192,512,ck,13,0,6.273,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,216.64,1104.97,0.0 +gfx950,256,166,8192,512,ck,13,0,6.279,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,221.77,1114.67,0.0 +gfx950,256,136,8192,512,ck,18,0,6.2807,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,181.64,1033.67,0.0 +gfx950,256,163,8192,512,ck,13,0,6.2831,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,217.62,1105.88,0.0 +gfx950,256,168,8192,512,ck,13,0,6.2881,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,224.12,1118.44,0.0 +gfx950,256,176,8192,512,ck,13,0,6.3111,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,233.94,1135.78,0.0 +gfx950,256,165,8192,512,ck,13,0,6.3139,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,219.22,1105.84,0.0 +gfx950,256,149,8192,512,ck,18,0,6.3139,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,197.96,1063.02,0.0 +gfx950,256,169,8192,512,ck,13,0,6.3146,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,224.51,1116.42,0.0 +gfx950,256,184,8192,512,ck,13,0,6.3255,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,244.01,1154.56,0.0 +gfx950,256,174,8192,512,ck,13,0,6.3308,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,230.56,1126.9,0.0 +gfx950,256,172,8192,512,ck,13,0,6.3368,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,227.69,1120.5,0.0 +gfx950,256,142,8192,512,ck,18,0,6.3427,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,187.8,1039.55,0.0 +gfx950,256,182,8192,512,ck,13,0,6.357,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,240.16,1143.52,0.0 +gfx950,256,1,6144,1536,ck,8,0,6.3579,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,2.97,1486.5,0.0 +gfx950,256,178,8192,512,ck,13,0,6.3615,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,234.72,1132.09,0.0 +gfx950,256,181,8192,512,ck,13,0,6.3707,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,238.33,1138.41,0.0 +gfx950,256,175,8192,512,ck,13,0,6.3794,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,230.12,1120.97,0.0 +gfx950,256,185,8192,512,ck,13,0,6.387,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,242.98,1146.09,0.0 +gfx950,256,4,6144,1536,ck,8,0,6.3927,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,11.81,1484.89,0.0 +gfx950,256,2,6144,1536,ck,8,0,6.4194,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,5.88,1474.41,0.0 +gfx950,256,161,8192,512,ck,18,0,6.4235,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,210.25,1076.45,0.0 +gfx950,256,16,6144,1536,ck,8,0,6.4237,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,47.01,1503.55,0.0 +gfx950,256,190,8192,512,ck,13,0,6.4479,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,247.19,1148.37,0.0 +gfx950,256,191,8192,512,ck,13,0,6.471,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,247.6,1146.88,0.0 +gfx950,256,196,8192,512,ck,18,0,6.4828,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,253.62,1157.82,0.0 +gfx950,256,24,8192,1536,ck,8,0,6.4842,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,93.15,2006.88,0.0 +gfx950,256,200,8192,512,ck,18,0,6.4987,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,258.16,1165.39,0.0 +gfx950,256,208,8192,512,ck,18,0,6.5097,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,268.04,1184.18,0.0 +gfx950,256,202,8192,512,ck,18,0,6.5111,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,260.25,1168.36,0.0 +gfx950,256,205,8192,512,ck,18,0,6.5115,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,264.1,1176.07,0.0 +gfx950,256,215,8192,512,ck,18,0,6.5211,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,276.57,1200.25,0.0 +gfx950,256,217,8192,512,ck,18,0,6.5352,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,278.54,1202.83,0.0 +gfx950,256,183,8192,512,ck,18,0,6.5423,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,234.64,1113.72,0.0 +gfx950,256,211,8192,512,ck,18,0,6.5425,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,270.54,1185.99,0.0 +gfx950,256,219,8192,512,ck,18,0,6.5655,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,279.81,1202.43,0.0 +gfx950,256,210,8192,512,ck,18,0,6.5732,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,268.0,1177.88,0.0 +gfx950,256,214,8192,512,ck,18,0,6.5793,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,272.85,1187.06,0.0 +gfx950,256,32,6144,1536,ck,8,0,6.5918,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,91.63,1498.76,0.0 +gfx950,256,225,8192,512,ck,18,0,6.6027,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,285.86,1211.01,0.0 +gfx950,256,212,8192,512,ck,18,0,6.6031,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,269.33,1177.67,0.0 +gfx950,256,232,8192,512,ck,18,0,6.6191,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,294.02,1225.87,0.0 +gfx950,256,230,8192,512,ck,18,0,6.6331,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,290.87,1218.19,0.0 +gfx950,256,222,8192,512,ck,18,0,6.637,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,280.59,1197.11,0.0 +gfx950,256,24,6144,1536,ck,8,0,6.6383,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,68.24,1471.61,0.0 +gfx950,256,198,8192,512,ck,17,0,6.6579,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,249.47,1132.45,0.0 +gfx950,256,235,8192,512,ck,18,0,6.6738,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,295.38,1223.42,0.0 +gfx950,256,207,8192,512,ck,17,0,6.699,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,259.21,1148.2,0.0 +gfx950,256,206,8192,512,ck,17,0,6.7158,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,257.31,1142.81,0.0 +gfx950,256,243,8192,512,ck,17,0,6.7332,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,302.74,1232.7,0.0 +gfx950,256,255,8192,512,ck,18,0,6.7351,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,317.6,1262.46,0.0 +gfx950,256,24,128,7168,ck,8,2,6.7385,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,6.54,162.6,0.0 +gfx950,256,247,8192,512,ck,17,0,6.7574,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,306.62,1238.29,0.0 +gfx950,256,48,128,7168,ck,3,3,6.8196,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,12.92,186.79,0.001 +gfx950,256,128,4096,1280,ck,8,0,6.897,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,194.6,935.96,0.0 +gfx950,256,8,6144,1536,ck,8,0,7.0062,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,21.55,1362.76,0.0 +gfx950,256,226,8192,512,ck,16,0,7.0869,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,267.51,1130.65,0.0 +gfx950,256,233,8192,512,ck,2,0,7.2197,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_1x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,270.72,1126.23,0.0 +gfx950,256,40,6144,1536,ck,18,0,7.2556,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,104.05,1376.89,0.0 +gfx950,256,48,6144,1536,ck,18,0,7.2677,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,124.66,1389.81,0.0 +gfx950,256,250,8192,512,ck,3,0,7.2763,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,288.22,1156.95,0.0 +gfx950,256,40,8192,1536,ck,18,0,7.3147,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,137.62,1818.22,0.0 +gfx950,256,56,6144,1536,ck,18,0,7.3214,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,144.37,1394.72,0.0 +gfx950,256,48,8192,1536,ck,7,0,7.3799,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,163.68,1821.58,0.0 +gfx950,256,232,128,7168,ck,8,2,7.3851,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,57.65,357.46,0.0 +gfx950,256,56,8192,1536,ck,18,0,7.4346,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,189.56,1827.46,0.0 +gfx950,256,88,6144,1536,ck,18,0,7.48,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,222.05,1424.29,0.0 +gfx950,256,64,6144,1536,ck,8,0,7.5132,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,160.78,1373.84,0.0 +gfx950,256,104,6144,1536,ck,18,0,7.5222,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,260.95,1445.7,0.0 +gfx950,256,112,6144,1536,ck,18,0,7.5393,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,280.39,1457.09,0.0 +gfx950,256,80,6144,1536,ck,18,0,7.5795,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,199.21,1391.0,0.0 +gfx950,256,72,6144,1536,ck,7,0,7.5797,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,179.29,1376.38,0.0 +gfx950,256,96,6144,1536,ck,18,0,7.5802,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,239.04,1420.05,0.0 +gfx950,256,128,6144,1536,ck,18,0,7.5858,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,318.48,1477.32,0.0 +gfx950,256,120,6144,1536,ck,18,0,7.6051,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,297.82,1459.03,0.0 +gfx950,256,260,8192,512,ck,3,0,7.6072,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,286.71,1128.83,0.0 +gfx950,256,286,8192,512,ck,13,0,7.608,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,315.34,1186.46,0.0 +gfx950,256,88,8192,1536,ck,18,0,7.6226,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,290.53,1857.62,0.0 +gfx950,256,72,128,7168,ck,8,2,7.6325,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,17.31,190.24,0.0001 +gfx950,256,258,8192,512,ck,18,0,7.6359,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,283.43,1120.17,0.0 +gfx950,256,281,8192,512,ck,13,0,7.6391,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,308.57,1170.57,0.0 +gfx950,256,80,8192,1536,ck,18,0,7.6417,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,263.46,1834.21,0.0 +gfx950,256,268,8192,512,ck,18,0,7.6424,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,294.17,1141.32,0.0 +gfx950,256,310,8192,512,ck,16,0,7.6447,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,340.17,1233.8,0.0 +gfx950,256,264,8192,512,ck,3,0,7.6633,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,288.99,1129.39,0.0 +gfx950,256,72,8192,1536,ck,18,0,7.6639,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,236.43,1810.19,0.0 +gfx950,256,280,8192,512,ck,18,0,7.6651,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,306.43,1164.39,0.0 +gfx950,256,56,128,7168,ck,16,3,7.6673,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,13.4,173.89,0.0 +gfx950,256,168,128,7168,ck,6,3,7.6813,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,40.13,281.82,0.0013 +gfx950,256,270,8192,512,ck,18,0,7.6819,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,294.84,1139.85,0.0 +gfx950,256,112,8192,1536,ck,18,0,7.6911,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,366.47,1896.99,0.0 +gfx950,256,262,8192,512,ck,13,0,7.6923,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,285.72,1120.74,0.0 +gfx950,256,265,8192,512,ck,13,0,7.7048,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,288.52,1125.5,0.0 +gfx950,256,285,8192,512,ck,13,0,7.7186,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,309.74,1167.27,0.0 +gfx950,256,275,8192,512,ck,18,0,7.7304,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,298.41,1143.63,0.0 +gfx950,256,80,128,7168,ck,6,3,7.7437,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,18.96,195.18,0.0013 +gfx950,256,144,6144,1536,ck,12,0,7.7459,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,350.88,1475.34,0.0 +gfx950,256,300,8192,512,ck,13,0,7.7483,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,324.79,1195.5,0.0 +gfx950,256,263,8192,512,ck,12,0,7.7517,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,284.61,1114.33,0.0 +gfx950,256,257,8192,512,ck,16,0,7.7533,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,278.06,1101.02,0.0 +gfx950,256,299,8192,512,ck,13,0,7.7546,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,323.45,1192.35,0.0 +gfx950,256,309,8192,512,ck,16,0,7.7704,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,333.58,1211.67,0.0 +gfx950,256,120,8192,1536,ck,18,0,7.7752,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,388.4,1894.91,0.0 +gfx950,256,276,8192,512,ck,16,0,7.7799,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,297.59,1138.52,0.0 +gfx950,256,316,8192,512,ck,16,0,7.7847,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,340.51,1224.64,0.0 +gfx950,256,289,8192,512,ck,16,0,7.7936,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,311.06,1164.71,0.0 +gfx950,256,303,8192,512,ck,13,0,7.8135,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,325.3,1192.01,0.0 +gfx950,256,272,8192,512,ck,18,0,7.8151,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,291.96,1124.75,0.0 +gfx950,256,261,8192,512,ck,13,0,7.8185,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,280.03,1100.49,0.0 +gfx950,256,298,8192,512,ck,16,0,7.8466,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,318.58,1176.22,0.0 +gfx950,256,307,8192,512,ck,12,0,7.8473,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,328.18,1195.49,0.0 +gfx950,256,325,8192,512,ck,3,0,7.863,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,346.72,1231.78,0.0 +gfx950,256,326,8192,512,ck,3,0,7.8701,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,347.48,1232.82,0.0 +gfx950,256,314,8192,512,ck,13,0,7.8799,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,334.27,1205.55,0.0 +gfx950,256,292,8192,512,ck,13,0,7.8812,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,310.8,1158.19,0.0 +gfx950,256,290,8192,512,ck,18,0,7.8841,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,308.56,1153.48,0.0 +gfx950,256,318,8192,512,ck,12,0,7.8905,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,338.07,1212.5,0.0 +gfx950,256,294,8192,512,ck,13,0,7.9186,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,311.45,1156.99,0.0 +gfx950,256,332,8192,512,ck,18,0,7.9203,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,351.63,1237.8,0.0 +gfx950,256,324,8192,512,ck,16,0,7.9276,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,342.84,1219.61,0.0 +gfx950,256,176,128,7168,ck,6,3,7.9293,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,40.73,280.49,0.0012 +gfx950,256,335,8192,512,ck,18,0,7.9293,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,354.41,1242.79,0.0 +gfx950,256,333,8192,512,ck,16,0,7.9434,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,351.66,1236.33,0.0 +gfx950,256,315,8192,512,ck,16,0,7.9463,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,332.53,1197.61,0.0 +gfx950,256,319,8192,512,ck,13,0,7.9524,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,336.5,1205.19,0.0 +gfx950,256,152,6144,1536,ck,12,0,7.9614,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,360.35,1449.3,0.0 +gfx950,256,283,8192,512,ck,18,0,7.9623,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,298.15,1127.3,0.0 +gfx950,256,304,8192,512,ck,13,0,7.9631,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,320.24,1171.74,0.0 +gfx950,256,353,8192,512,ck,18,0,7.9679,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,371.64,1274.94,0.0 +gfx950,256,104,8192,1536,ck,12,0,7.9777,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,328.07,1810.87,0.0 +gfx950,256,358,8192,512,ck,16,0,7.9808,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,376.29,1283.46,0.0 +gfx950,256,321,8192,512,ck,16,0,7.9922,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,336.92,1203.41,0.0 +gfx950,256,363,8192,512,ck,18,0,7.9979,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,380.73,1291.28,0.0 +gfx950,256,361,8192,512,ck,18,0,8.0257,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,377.32,1282.6,0.0 +gfx950,256,136,6144,1536,ck,12,0,8.0305,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,319.65,1409.28,0.0 +gfx950,256,344,8192,512,ck,18,0,8.0311,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,359.31,1245.97,0.0 +gfx950,256,373,8192,512,ck,18,0,8.0383,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,389.26,1305.81,0.0 +gfx950,256,372,8192,512,ck,18,0,8.0419,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,388.04,1303.13,0.0 +gfx950,256,343,8192,512,ck,16,0,8.0564,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,357.14,1239.96,0.0 +gfx950,256,342,8192,512,ck,18,0,8.0663,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,355.67,1236.35,0.0 +gfx950,256,374,8192,512,ck,3,0,8.0799,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,388.29,1301.18,0.0 +gfx950,256,376,8192,512,ck,18,0,8.0848,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,390.13,1304.57,0.0 +gfx950,256,370,8192,512,ck,18,0,8.0927,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,383.53,1290.77,0.0 +gfx950,256,120,128,7168,ck,5,3,8.0953,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,27.2,223.39,0.0006 +gfx950,256,349,8192,512,ck,18,0,8.1038,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,361.27,1245.22,0.0 +gfx950,256,378,8192,512,ck,16,0,8.105,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,391.23,1305.49,0.0 +gfx950,256,136,128,7168,ck,6,3,8.1497,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,30.62,236.47,0.0007 +gfx950,256,341,8192,512,ck,16,0,8.1525,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,350.88,1221.2,0.0 +gfx950,256,380,8192,512,ck,16,0,8.1656,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,390.38,1299.94,0.0 +gfx950,256,356,8192,512,ck,16,0,8.1732,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,365.38,1249.12,0.0 +gfx950,256,338,8192,512,ck,16,0,8.1834,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,346.48,1210.4,0.0 +gfx950,256,362,8192,512,ck,16,0,8.1835,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,371.07,1259.93,0.0 +gfx950,256,350,8192,512,ck,16,0,8.1897,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,358.5,1234.22,0.0 +gfx950,256,346,8192,512,ck,3,0,8.1911,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,354.34,1225.76,0.0 +gfx950,256,377,8192,512,ck,18,0,8.2015,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,385.6,1288.07,0.0 +gfx950,256,368,8192,512,ck,3,0,8.2619,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,373.64,1260.25,0.0 +gfx950,256,160,6144,1536,ck,12,0,8.3183,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,363.04,1400.41,0.0 +gfx950,256,88,128,7168,ck,5,3,8.3649,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,19.3,187.79,0.0008 +gfx950,256,382,8192,512,ck,3,0,8.3655,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_1x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,383.06,1272.92,0.0 +gfx950,256,144,128,7168,ck,8,2,8.5353,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,30.96,232.75,0.0 +gfx950,256,184,128,7168,ck,6,3,8.5701,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,39.4,266.45,0.0008 +gfx950,256,208,128,7168,ck,8,2,8.5893,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,44.44,286.6,0.0 +gfx950,256,112,128,7168,ck,11,3,8.6373,a8w8_blockscale_1x128x128_256x32x64x128_16x16_16x16_2x1_8x32x1_8x32x1_1x32x1x8_8_2x1_intrawave_v1,23.79,202.49,0.0008 +gfx950,256,152,128,7168,ck,8,2,8.6946,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,32.08,235.31,0.0 +gfx950,256,200,128,7168,ck,6,3,8.7508,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,41.94,274.52,0.0012 +gfx950,256,104,128,7168,ck,18,2,8.8869,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,21.47,190.12,0.0 +gfx950,256,216,128,7168,ck,8,2,9.2441,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,42.88,272.72,0.0 +gfx950,256,469,8192,512,ck,0,0,9.2559,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,425.05,1309.28,0.0 +gfx950,256,482,8192,512,ck,0,0,9.3195,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,433.85,1323.91,0.0 +gfx950,256,492,8192,512,ck,18,0,9.349,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,441.46,1337.8,0.0 +gfx950,256,472,8192,512,ck,18,0,9.3591,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,423.06,1300.25,0.0 +gfx950,256,451,8192,512,ck,18,0,9.4131,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,401.91,1255.1,0.0 +gfx950,256,475,8192,512,ck,0,0,9.4183,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,423.07,1297.46,0.0 +gfx950,256,470,8192,512,ck,18,0,9.4674,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,416.44,1281.81,0.0 +gfx950,256,478,8192,512,ck,17,0,9.4995,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,422.1,1291.71,0.0 +gfx950,256,476,8192,512,ck,18,0,9.5078,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,419.97,1287.03,0.0 +gfx950,256,450,8192,512,ck,17,0,9.548,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,395.36,1235.6,0.0 +gfx950,256,304,128,7168,ck,6,3,9.5569,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,58.37,332.16,0.0002 +gfx950,256,467,8192,512,ck,0,0,9.5623,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,409.68,1263.79,0.0 +gfx950,256,391,8192,512,ck,0,0,9.5811,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,342.33,1127.29,0.0 +gfx950,256,459,8192,512,ck,0,0,9.6055,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,400.85,1244.03,0.0 +gfx950,256,494,8192,512,ck,18,0,9.6088,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,431.27,1305.15,0.0 +gfx950,256,488,8192,512,ck,0,0,9.6214,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,425.47,1292.9,0.0 +gfx950,256,486,8192,512,ck,0,0,9.6278,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,423.45,1288.54,0.0 +gfx950,256,464,8192,512,ck,18,0,9.6347,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,403.99,1249.03,0.0 +gfx950,256,240,128,7168,ck,6,3,9.6395,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,45.69,280.02,0.0012 +gfx950,256,489,8192,512,ck,0,0,9.6655,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,424.4,1288.75,0.0 +gfx950,256,428,8192,512,ck,0,0,9.6683,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,371.35,1181.78,0.0 +gfx950,256,415,8192,512,ck,0,0,9.6959,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,359.05,1155.76,0.0 +gfx950,256,456,8192,512,ck,17,0,9.7619,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,391.85,1218.91,0.0 +gfx950,256,458,8192,512,ck,2,0,9.7702,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_1x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,393.23,1221.33,0.0 +gfx950,256,386,8192,512,ck,18,0,9.7727,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,331.33,1096.54,0.0 +gfx950,256,402,8192,512,ck,0,0,9.8027,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,344.01,1120.76,0.0 +gfx950,256,421,8192,512,ck,0,0,9.8183,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,359.7,1151.68,0.0 +gfx950,256,465,8192,512,ck,0,0,9.8411,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,396.37,1224.55,0.0 +gfx950,256,468,8192,512,ck,18,0,9.8435,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,398.83,1229.4,0.0 +gfx950,256,504,8192,512,ck,17,0,9.8779,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,428.01,1286.7,0.0 +gfx950,256,437,8192,512,ck,18,0,9.8794,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,371.06,1171.92,0.0 +gfx950,256,443,8192,512,ck,0,0,9.8983,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,375.43,1179.92,0.0 +gfx950,256,396,8192,512,ck,0,0,9.905,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,335.37,1098.95,0.0 +gfx950,256,502,8192,512,ck,0,0,9.9571,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,422.92,1273.07,0.0 +gfx950,256,422,8192,512,ck,0,0,9.9779,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,354.78,1134.95,0.0 +gfx950,256,407,8192,512,ck,18,0,9.9819,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,342.04,1109.11,0.0 +gfx950,256,442,8192,512,ck,0,0,9.9895,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,371.17,1167.46,0.0 +gfx950,256,474,8192,512,ck,17,0,9.9979,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,397.7,1220.56,0.0 +gfx950,256,430,8192,512,ck,0,0,10.0015,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,360.66,1145.79,0.0 +gfx950,256,510,8192,512,ck,2,0,10.0262,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_1x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,426.7,1277.78,0.0 +gfx950,256,403,8192,512,ck,18,0,10.0427,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,336.62,1095.66,0.0 +gfx950,256,479,8192,512,ck,17,0,10.0475,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,399.91,1222.94,0.0 +gfx950,256,397,8192,512,ck,18,0,10.0487,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,331.41,1084.92,0.0 +gfx950,256,392,8192,512,ck,0,0,10.0523,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,327.12,1076.13,0.0 +gfx950,256,393,8192,512,ck,18,0,10.0527,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,327.94,1077.76,0.0 +gfx950,256,425,8192,512,ck,0,0,10.0635,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,354.27,1130.33,0.0 +gfx950,256,423,8192,512,ck,0,0,10.0671,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,352.47,1126.57,0.0 +gfx950,256,388,8192,512,ck,0,0,10.1123,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,321.86,1063.06,0.0 +gfx950,256,433,8192,512,ck,0,0,10.1186,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,358.97,1137.54,0.0 +gfx950,256,400,128,7168,ck,8,2,10.1266,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,72.48,383.85,0.0 +gfx950,256,420,8192,512,ck,18,0,10.1735,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,346.31,1109.81,0.0 +gfx950,256,464,128,7168,ck,6,3,10.1957,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,83.51,427.85,0.0012 +gfx950,256,507,8192,512,ck,17,0,10.1971,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,417.08,1251.39,0.0 +gfx950,256,455,8192,512,ck,18,0,10.2007,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,374.17,1164.82,0.0 +gfx950,256,434,8192,512,ck,0,0,10.2504,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,355.17,1124.56,0.0 +gfx950,256,493,8192,512,ck,0,0,10.2507,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,403.44,1221.77,0.0 +gfx950,256,471,8192,512,ck,0,0,10.2903,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,383.96,1180.95,0.0 +gfx950,256,445,8192,512,ck,0,0,10.2935,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,362.65,1137.9,0.0 +gfx950,256,440,8192,512,ck,0,0,10.3095,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,358.02,1127.94,0.0 +gfx950,256,505,8192,512,ck,15,0,10.3135,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_2x1_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,410.75,1233.99,0.0 +gfx950,256,481,8192,512,ck,0,0,10.3455,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,390.02,1190.98,0.0 +gfx950,256,419,8192,512,ck,2,0,10.3635,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_1x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,339.15,1087.83,0.0 +gfx950,256,426,8192,512,ck,0,0,10.4231,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,342.85,1092.96,0.0 +gfx950,256,368,128,7168,ck,6,3,10.5301,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,64.13,346.58,0.0013 +gfx950,256,436,8192,512,ck,0,0,10.5557,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,346.49,1095.23,0.0 +gfx950,256,435,8192,512,ck,18,0,10.6019,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,344.19,1088.87,0.0 +gfx950,256,248,128,7168,ck,7,2,10.6871,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,42.58,258.13,0.0 +gfx950,256,409,8192,512,ck,13,0,10.7499,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,319.16,1033.01,0.0 +gfx950,256,128,1024,4096,ck,8,1,10.7865,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,99.54,461.76,0.0 +gfx950,256,336,128,7168,ck,5,3,10.8441,a8w8_blockscale_1x128x128_256x16x128x128_8x16_16x16_1x2_16x16x1_8x32x1_1x16x1x16_8_1x2_intrawave_v1,56.86,314.64,0.0005 +gfx950,256,224,6144,1536,ck,18,0,10.8515,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,389.61,1155.03,0.0 +gfx950,256,192,6144,1536,ck,18,0,10.9055,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,332.3,1108.74,0.0 +gfx950,256,830,128,7168,ck,8,2,10.9267,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,139.39,647.9,0.0 +gfx950,256,496,128,7168,ck,7,2,10.9409,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,83.19,420.42,0.0 +gfx950,256,256,6144,1536,ck,18,0,10.9719,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,440.38,1182.67,0.0 +gfx950,256,272,6144,1536,ck,18,0,10.9923,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,467.04,1200.6,0.0 +gfx950,256,200,6144,1536,ck,18,0,11.0035,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,343.06,1108.92,0.0 +gfx950,256,232,6144,1536,ck,18,0,11.0159,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,397.5,1147.83,0.0 +gfx950,256,216,8192,1536,ck,18,0,11.0511,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,491.88,1488.87,0.0 +gfx950,256,304,6144,1536,ck,18,0,11.0512,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,519.2,1234.23,0.0 +gfx950,256,208,6144,1536,ck,18,0,11.1301,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,352.73,1106.24,0.0 +gfx950,256,1001,128,7168,ck,8,2,11.1437,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,164.83,749.21,0.0 +gfx950,256,320,6144,1536,ck,18,0,11.1485,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,541.76,1243.29,0.0 +gfx950,256,272,128,7168,ck,8,2,11.1519,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,44.76,263.35,0.0 +gfx950,256,184,6144,1536,ck,13,0,11.1566,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,311.29,1073.88,0.0 +gfx950,256,176,6144,1536,ck,18,0,11.1569,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,297.74,1063.93,0.0 +gfx950,256,248,8192,1536,ck,18,0,11.1784,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,558.32,1523.21,0.0 +gfx950,256,240,6144,1536,ck,18,0,11.1829,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,405.07,1140.58,0.0 +gfx950,256,168,6144,1536,ck,18,0,11.1836,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,283.53,1051.51,0.0 +gfx950,256,569,8192,512,ck,18,0,11.1921,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,426.47,1233.74,0.0 +gfx950,256,200,8192,1536,ck,18,0,11.2017,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,449.32,1443.26,0.0 +gfx950,256,557,8192,512,ck,18,0,11.2373,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,415.8,1210.73,0.0 +gfx950,256,734,128,7168,ck,8,2,11.241,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,119.82,566.38,0.0 +gfx950,256,517,8192,512,ck,18,0,11.254,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,385.37,1148.88,0.0 +gfx950,256,535,8192,512,ck,13,0,11.2601,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,398.57,1175.27,0.0 +gfx950,256,571,8192,512,ck,18,0,11.261,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,425.35,1229.19,0.0 +gfx950,256,551,8192,512,ck,18,0,11.2683,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,410.19,1198.41,0.0 +gfx950,256,518,8192,512,ck,18,0,11.2744,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,385.41,1148.3,0.0 +gfx950,256,560,8192,512,ck,18,0,11.2748,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,416.65,1211.2,0.0 +gfx950,256,549,8192,512,ck,18,0,11.2818,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,408.21,1193.98,0.0 +gfx950,256,529,8192,512,ck,18,0,11.2846,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,393.24,1163.74,0.0 +gfx950,256,521,8192,512,ck,18,0,11.2861,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,387.24,1151.6,0.0 +gfx950,256,534,8192,512,ck,18,0,11.2945,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,396.61,1170.2,0.0 +gfx950,256,576,8192,512,ck,18,0,11.3038,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,427.45,1232.01,0.0 +gfx950,256,432,128,7168,ck,6,3,11.3094,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_1x1_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,70.09,364.71,0.0011 +gfx950,256,565,8192,512,ck,18,0,11.3144,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,418.9,1214.43,0.0 +gfx950,256,566,8192,512,ck,18,0,11.3187,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,419.48,1215.46,0.0 +gfx950,256,528,8192,512,ck,18,0,11.3191,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,391.3,1158.7,0.0 +gfx950,256,516,8192,512,ck,18,0,11.3292,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,382.07,1139.77,0.0 +gfx950,256,536,8192,512,ck,18,0,11.3302,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,396.84,1169.49,0.0 +gfx950,256,176,8192,1536,ck,18,0,11.331,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,390.89,1388.83,0.0 +gfx950,256,856,128,7168,ck,8,2,11.3334,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,138.6,641.68,0.0 +gfx950,256,248,6144,1536,ck,18,0,11.3456,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,412.57,1133.97,0.0 +gfx950,256,530,8192,512,ck,18,0,11.3528,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,391.62,1158.23,0.0 +gfx950,256,522,8192,512,ck,13,0,11.3612,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,385.42,1145.48,0.0 +gfx950,256,152,8192,1536,ck,18,0,11.3692,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,336.45,1346.34,0.0 +gfx950,256,568,8192,512,ck,18,0,11.374,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,418.91,1212.52,0.0 +gfx950,256,558,8192,512,ck,18,0,11.3816,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,411.26,1196.87,0.0 +gfx950,256,288,6144,1536,ck,18,0,11.3906,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,477.22,1178.03,0.0 +gfx950,256,144,8192,1536,ck,18,0,11.3974,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,317.96,1330.43,0.0 +gfx950,256,168,8192,1536,ck,18,0,11.4003,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,370.86,1367.81,0.0 +gfx950,256,216,6144,1536,ck,18,0,11.4012,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,357.58,1089.64,0.0 +gfx950,256,553,8192,512,ck,18,0,11.4013,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,406.87,1187.39,0.0 +gfx950,256,515,8192,512,ck,18,0,11.4189,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,378.33,1129.33,0.0 +gfx950,256,546,8192,512,ck,18,0,11.4241,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,400.92,1174.67,0.0 +gfx950,256,599,8192,512,ck,18,0,11.4468,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,438.97,1250.57,0.0 +gfx950,256,580,8192,512,ck,18,0,11.4593,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,424.58,1221.19,0.0 +gfx950,256,611,8192,512,ck,18,0,11.462,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,447.17,1266.6,0.0 +gfx950,256,208,8192,1536,ck,18,0,11.4666,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,456.5,1422.42,0.0 +gfx950,256,563,8192,512,ck,18,0,11.4695,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,411.77,1195.06,0.0 +gfx950,256,639,8192,512,ck,18,0,11.4723,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,467.24,1306.7,0.0 +gfx950,256,583,8192,512,ck,18,0,11.4782,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,426.07,1223.6,0.0 +gfx950,256,1007,128,7168,ck,7,2,11.4861,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,160.88,730.75,0.0 +gfx950,256,524,8192,512,ck,13,0,11.4862,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_2x1_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,382.69,1135.96,0.0 +gfx950,256,613,8192,512,ck,18,0,11.4875,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,447.64,1266.73,0.0 +gfx950,256,579,8192,512,ck,18,0,11.4879,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,422.79,1216.68,0.0 +gfx950,256,184,8192,1536,ck,18,0,11.4961,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,402.79,1381.35,0.0 +gfx950,256,636,8192,512,ck,18,0,11.4997,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,463.94,1299.18,0.0 +gfx950,256,581,8192,512,ck,18,0,11.5037,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,423.67,1217.95,0.0 +gfx950,256,626,8192,512,ck,18,0,11.5065,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,456.37,1283.73,0.0 +gfx950,256,594,8192,512,ck,18,0,11.5066,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,433.04,1236.73,0.0 +gfx950,256,232,8192,1536,ck,18,0,11.5075,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,507.36,1454.73,0.0 +gfx950,256,637,8192,512,ck,18,0,11.5124,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,464.16,1299.21,0.0 +gfx950,256,136,8192,1536,ck,18,0,11.5126,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,297.29,1304.66,0.0 +gfx950,256,621,8192,512,ck,18,0,11.5133,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,452.46,1275.63,0.0 +gfx950,256,629,8192,512,ck,18,0,11.5148,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,458.23,1287.2,0.0 +gfx950,256,525,8192,512,ck,18,0,11.5179,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,382.36,1134.3,0.0 +gfx950,256,596,8192,512,ck,18,0,11.52,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,433.99,1238.22,0.0 +gfx950,256,607,8192,512,ck,18,0,11.52,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,442.0,1254.36,0.0 +gfx950,256,586,8192,512,ck,18,0,11.5227,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,426.61,1223.27,0.0 +gfx950,256,608,8192,512,ck,18,0,11.5253,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,442.53,1255.24,0.0 +gfx950,256,605,8192,512,ck,18,0,11.5267,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,440.29,1250.69,0.0 +gfx950,256,618,8192,512,ck,18,0,11.5316,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,449.56,1269.21,0.0 +gfx950,256,592,8192,512,ck,18,0,11.5327,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,430.61,1231.0,0.0 +gfx950,256,624,8192,512,ck,18,0,11.5395,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,453.62,1277.13,0.0 +gfx950,256,600,8192,512,ck,18,0,11.5412,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,436.1,1241.8,0.0 +gfx950,256,593,8192,512,ck,18,0,11.5413,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,431.01,1231.55,0.0 +gfx950,256,623,8192,512,ck,18,0,11.5421,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,452.79,1275.38,0.0 +gfx950,256,612,8192,512,ck,18,0,11.5591,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,444.14,1257.42,0.0 +gfx950,256,617,8192,512,ck,18,0,11.5656,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,447.51,1264.02,0.0 +gfx950,256,627,8192,512,ck,18,0,11.5677,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,454.68,1278.4,0.0 +gfx950,256,622,8192,512,ck,18,0,11.5684,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,451.03,1271.02,0.0 +gfx950,256,240,8192,1536,ck,18,0,11.5772,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,521.7,1458.36,0.0 +gfx950,256,615,8192,512,ck,18,0,11.5772,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,445.62,1259.83,0.0 +gfx950,256,556,8192,512,ck,18,0,11.6033,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,401.96,1171.09,0.0 +gfx950,256,602,8192,512,ck,18,0,11.6211,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,434.55,1236.17,0.0 +gfx950,256,640,8192,512,ck,18,0,11.6315,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,461.57,1290.27,0.0 +gfx950,256,570,8192,512,ck,18,0,11.8307,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,404.16,1168.57,0.0 +gfx950,256,1015,128,7168,ck,8,2,11.8456,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,157.23,713.59,0.0 +gfx950,256,651,8192,512,ck,18,0,12.4808,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,437.55,1217.36,0.0 +gfx950,256,679,8192,512,ck,18,0,12.4824,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,456.31,1255.1,0.0 +gfx950,256,654,8192,512,ck,18,0,12.4829,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,439.49,1221.21,0.0 +gfx950,256,652,8192,512,ck,18,0,12.4923,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,437.82,1217.59,0.0 +gfx950,256,643,8192,512,ck,18,0,12.4991,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,431.54,1204.76,0.0 +gfx950,256,667,8192,512,ck,18,0,12.5004,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,447.6,1237.08,0.0 +gfx950,256,681,8192,512,ck,18,0,12.505,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,456.83,1255.54,0.0 +gfx950,256,655,8192,512,ck,18,0,12.5066,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,439.33,1220.25,0.0 +gfx950,256,672,8192,512,ck,18,0,12.5102,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,450.6,1242.86,0.0 +gfx950,256,668,8192,512,ck,18,0,12.5171,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,447.67,1236.77,0.0 +gfx950,256,703,8192,512,ck,18,0,12.5174,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,471.12,1283.99,0.0 +gfx950,256,649,8192,512,ck,18,0,12.5179,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,434.91,1211.05,0.0 +gfx950,256,646,8192,512,ck,18,0,12.5187,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,432.88,1206.92,0.0 +gfx950,256,676,8192,512,ck,18,0,12.5207,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,452.91,1247.21,0.0 +gfx950,256,657,8192,512,ck,18,0,12.5229,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,440.1,1221.36,0.0 +gfx950,256,665,8192,512,ck,18,0,12.5275,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,445.29,1231.7,0.0 +gfx950,256,663,8192,512,ck,18,0,12.5279,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,443.94,1228.97,0.0 +gfx950,256,682,8192,512,ck,18,0,12.5356,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,456.38,1253.82,0.0 +gfx950,256,683,8192,512,ck,18,0,12.5464,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,456.66,1254.09,0.0 +gfx950,256,673,8192,512,ck,18,0,12.5497,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,449.85,1240.29,0.0 +gfx950,256,659,8192,512,ck,18,0,12.555,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,440.31,1220.93,0.0 +gfx950,256,690,8192,512,ck,18,0,12.5566,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,460.96,1262.49,0.0 +gfx950,256,658,8192,512,ck,18,0,12.5571,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,439.57,1219.38,0.0 +gfx950,256,656,8192,512,ck,18,0,12.5581,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,438.2,1216.59,0.0 +gfx950,256,684,8192,512,ck,18,0,12.5722,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,456.39,1252.86,0.0 +gfx950,256,669,8192,512,ck,18,0,12.5814,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,446.05,1231.8,0.0 +gfx950,256,689,8192,512,ck,18,0,12.5826,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,459.34,1258.54,0.0 +gfx950,256,701,8192,512,ck,18,0,12.6038,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,466.56,1272.51,0.0 +gfx950,256,694,8192,512,ck,18,0,12.6126,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,461.58,1262.24,0.0 +gfx950,256,645,8192,512,ck,18,0,12.6185,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,428.79,1196.04,0.0 +gfx950,256,700,8192,512,ck,18,0,12.6197,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,465.31,1269.56,0.0 +gfx950,256,691,8192,512,ck,18,0,12.6445,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,458.42,1255.05,0.0 +gfx950,256,704,8192,512,ck,17,0,12.6635,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,466.35,1270.51,0.0 +gfx950,256,712,8192,512,ck,18,0,12.7659,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,467.86,1270.91,0.0 +gfx950,256,715,8192,512,ck,18,0,12.7719,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,469.61,1274.28,0.0 +gfx950,256,714,8192,512,ck,18,0,12.7817,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,468.6,1271.98,0.0 +gfx950,256,707,8192,512,ck,18,0,12.7981,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,463.41,1261.11,0.0 +gfx950,256,728,8192,512,ck,18,0,12.8032,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,476.98,1288.32,0.0 +gfx950,256,713,8192,512,ck,18,0,12.81,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,466.91,1267.85,0.0 +gfx950,256,670,8192,512,ck,18,0,12.8147,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,438.59,1210.69,0.0 +gfx950,256,717,8192,512,ck,18,0,12.8222,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,469.08,1271.91,0.0 +gfx950,256,724,8192,512,ck,18,0,12.8444,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,472.84,1278.92,0.0 +gfx950,256,732,8192,512,ck,18,0,12.8479,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,477.93,1289.1,0.0 +gfx950,256,710,8192,512,ck,18,0,12.849,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,463.53,1260.06,0.0 +gfx950,256,764,8192,512,ck,18,0,12.8513,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,498.7,1330.83,0.0 +gfx950,256,733,8192,512,ck,18,0,12.856,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,478.29,1289.6,0.0 +gfx950,256,726,8192,512,ck,18,0,12.856,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,473.72,1280.4,0.0 +gfx950,256,765,8192,512,ck,18,0,12.8621,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,498.93,1331.02,0.0 +gfx950,256,729,8192,512,ck,18,0,12.8659,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,475.31,1283.35,0.0 +gfx950,256,755,8192,512,ck,18,0,12.8717,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,492.04,1316.9,0.0 +gfx950,256,759,8192,512,ck,18,0,12.8726,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,494.61,1322.06,0.0 +gfx950,256,718,8192,512,ck,18,0,12.8727,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,467.89,1268.24,0.0 +gfx950,256,716,8192,512,ck,18,0,12.8856,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,466.12,1264.34,0.0 +gfx950,256,758,8192,512,ck,18,0,12.8872,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,493.4,1319.25,0.0 +gfx950,256,734,8192,512,ck,18,0,12.8873,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,477.78,1287.78,0.0 +gfx950,256,760,8192,512,ck,18,0,12.8913,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,494.55,1321.45,0.0 +gfx950,256,1009,128,7168,ck,7,2,12.8939,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,143.6,652.12,0.0 +gfx950,256,753,8192,512,ck,18,0,12.8942,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,489.88,1311.98,0.0 +gfx950,256,746,8192,512,ck,18,0,12.8945,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,485.32,1302.78,0.0 +gfx950,256,739,8192,512,ck,18,0,12.9076,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,480.27,1292.3,0.0 +gfx950,256,738,8192,512,ck,18,0,12.9095,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,479.55,1290.8,0.0 +gfx950,256,748,8192,512,ck,18,0,12.9175,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,485.75,1303.08,0.0 +gfx950,256,719,8192,512,ck,18,0,12.9273,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,466.56,1264.19,0.0 +gfx950,256,743,8192,512,ck,18,0,12.931,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,482.0,1295.18,0.0 +gfx950,256,1023,128,7168,ck,18,2,13.0101,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,144.29,654.28,0.0 +gfx950,256,352,6144,1536,ck,18,0,13.791,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,481.75,1037.14,0.0 +gfx950,256,814,8192,512,ck,18,0,13.8472,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,493.12,1296.12,0.0 +gfx950,256,24,7168,4096,ck,8,0,13.8633,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,101.66,2149.74,0.0 +gfx950,256,336,6144,1536,ck,18,0,13.8659,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,457.37,1015.59,0.0 +gfx950,256,783,8192,512,ck,18,0,13.8674,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,473.65,1256.46,0.0 +gfx950,256,811,8192,512,ck,18,0,13.8759,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,490.29,1289.79,0.0 +gfx950,256,784,8192,512,ck,18,0,13.8933,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,473.37,1255.34,0.0 +gfx950,256,816,8192,512,ck,18,0,13.8976,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,492.54,1293.85,0.0 +gfx950,256,797,8192,512,ck,18,0,13.9026,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,480.9,1270.3,0.0 +gfx950,256,795,8192,512,ck,18,0,13.9073,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,479.53,1267.44,0.0 +gfx950,256,805,8192,512,ck,18,0,13.9147,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,485.3,1278.91,0.0 +gfx950,256,801,8192,512,ck,18,0,13.9159,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,482.85,1273.94,0.0 +gfx950,256,775,8192,512,ck,18,0,13.916,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,467.17,1242.36,0.0 +gfx950,256,782,8192,512,ck,18,0,13.9209,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,471.23,1250.42,0.0 +gfx950,256,773,8192,512,ck,18,0,13.9216,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,465.78,1239.43,0.0 +gfx950,256,800,8192,512,ck,18,0,13.9297,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,481.77,1271.46,0.0 +gfx950,256,786,8192,512,ck,18,0,13.9298,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,473.33,1254.47,0.0 +gfx950,256,772,8192,512,ck,18,0,13.9319,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,464.83,1237.31,0.0 +gfx950,256,793,8192,512,ck,18,0,13.9381,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,477.26,1262.21,0.0 +gfx950,256,825,8192,512,ck,18,0,13.9439,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,496.32,1300.46,0.0 +gfx950,256,807,8192,512,ck,18,0,13.9461,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,485.41,1278.45,0.0 +gfx950,256,815,8192,512,ck,18,0,13.9591,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,489.77,1286.94,0.0 +gfx950,256,794,8192,512,ck,18,0,13.9601,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,477.11,1261.43,0.0 +gfx950,256,368,6144,1536,ck,18,0,13.963,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,497.44,1040.21,0.0 +gfx950,256,829,8192,512,ck,18,0,13.964,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,498.01,1303.43,0.0 +gfx950,256,824,8192,512,ck,18,0,13.9646,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,494.98,1297.32,0.0 +gfx950,256,792,8192,512,ck,18,0,13.9648,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,475.75,1258.59,0.0 +gfx950,256,777,8192,512,ck,18,0,13.9696,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,466.58,1240.01,0.0 +gfx950,256,384,6144,1536,ck,18,0,13.9746,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,518.64,1055.17,0.0 +gfx950,256,799,8192,512,ck,18,0,13.9767,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,479.55,1265.98,0.0 +gfx950,256,822,8192,512,ck,18,0,13.9769,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,493.35,1293.76,0.0 +gfx950,256,818,8192,512,ck,18,0,13.9791,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,490.87,1288.73,0.0 +gfx950,256,803,8192,512,ck,18,0,13.9808,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,481.81,1270.44,0.0 +gfx950,256,831,8192,512,ck,18,0,13.9834,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,498.51,1304.04,0.0 +gfx950,256,788,8192,512,ck,18,0,13.9898,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,472.5,1251.51,0.0 +gfx950,256,821,8192,512,ck,18,0,13.9902,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,492.28,1291.33,0.0 +gfx950,256,819,8192,512,ck,18,0,13.9996,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,490.75,1288.05,0.0 +gfx950,256,828,8192,512,ck,18,0,14.0074,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,495.86,1298.18,0.0 +gfx950,256,833,8192,512,ck,18,0,14.2164,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,491.52,1285.04,0.0 +gfx950,256,865,8192,512,ck,18,0,14.2478,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,509.28,1320.16,0.0 +gfx950,256,869,8192,512,ck,18,0,14.2522,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,511.48,1324.49,0.0 +gfx950,256,873,8192,512,ck,18,0,14.255,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,513.73,1328.97,0.0 +gfx950,256,889,8192,512,ck,18,0,14.2564,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,523.1,1347.81,0.0 +gfx950,256,895,8192,512,ck,18,0,14.2704,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,526.11,1353.59,0.0 +gfx950,256,882,8192,512,ck,18,0,14.2749,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,518.31,1337.77,0.0 +gfx950,256,858,8192,512,ck,18,0,14.2825,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,503.93,1308.67,0.0 +gfx950,256,855,8192,512,ck,18,0,14.2857,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,502.06,1304.83,0.0 +gfx950,256,880,8192,512,ck,18,0,14.2929,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,516.48,1333.72,0.0 +gfx950,256,866,8192,512,ck,18,0,14.2929,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,508.26,1317.17,0.0 +gfx950,256,872,8192,512,ck,18,0,14.2933,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,511.77,1324.23,0.0 +gfx950,256,846,8192,512,ck,18,0,14.2952,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,496.44,1293.32,0.0 +gfx950,256,863,8192,512,ck,18,0,14.3012,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,506.21,1312.87,0.0 +gfx950,256,839,8192,512,ck,18,0,14.3072,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,491.92,1283.97,0.0 +gfx950,256,842,8192,512,ck,18,0,14.3106,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,493.56,1287.21,0.0 +gfx950,256,894,8192,512,ck,18,0,14.3129,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,523.96,1348.39,0.0 +gfx950,256,870,8192,512,ck,18,0,14.3149,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,509.82,1319.87,0.0 +gfx950,256,852,8192,512,ck,18,0,14.3244,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,498.95,1297.76,0.0 +gfx950,256,850,8192,512,ck,18,0,14.3275,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,497.67,1295.13,0.0 +gfx950,256,892,8192,512,ck,18,0,14.3298,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,522.17,1344.44,0.0 +gfx950,256,861,8192,512,ck,18,0,14.3375,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,503.76,1307.18,0.0 +gfx950,256,847,8192,512,ck,18,0,14.3419,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,495.41,1290.29,0.0 +gfx950,256,272,8192,1536,ck,18,0,14.347,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,477.11,1216.78,0.0 +gfx950,256,890,8192,512,ck,18,0,14.3489,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,520.31,1340.29,0.0 +gfx950,256,875,8192,512,ck,18,0,14.3495,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,511.52,1322.58,0.0 +gfx950,256,867,8192,512,ck,18,0,14.3498,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,506.83,1313.13,0.0 +gfx950,256,860,8192,512,ck,18,0,14.3654,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,502.19,1303.47,0.0 +gfx950,256,859,8192,512,ck,18,0,14.3712,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,501.41,1301.77,0.0 +gfx950,256,400,6144,1536,ck,18,0,14.3732,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,525.27,1041.3,0.0 +gfx950,256,837,8192,512,ck,18,0,14.3742,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,488.46,1275.64,0.0 +gfx950,256,883,8192,512,ck,18,0,14.3771,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,515.2,1329.44,0.0 +gfx950,256,1047,128,7168,ck,8,2,14.3777,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,133.63,604.44,0.0 +gfx950,256,416,6144,1536,ck,18,0,14.3952,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,545.44,1055.07,0.0 +gfx950,256,868,8192,512,ck,18,0,14.3975,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,505.73,1309.95,0.0 +gfx950,256,884,8192,512,ck,18,0,14.4344,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,513.74,1325.33,0.0 +gfx950,256,1003,128,7168,ck,7,2,14.4957,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,126.97,576.98,0.0 +gfx950,256,1265,128,7168,ck,8,2,14.5029,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,160.06,710.81,0.0 +gfx950,256,448,6144,1536,ck,18,0,14.5126,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,582.65,1077.02,0.0 +gfx950,256,1000,128,7168,ck,8,2,14.5397,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,126.21,573.71,0.0 +gfx950,256,304,8192,1536,ck,18,0,14.5481,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,525.87,1239.38,0.0 +gfx950,256,40,7168,4096,ck,18,0,14.5552,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,161.37,2067.81,0.0 +gfx950,256,24,7168,4608,ck,18,1,14.5909,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,108.66,2294.91,0.0 +gfx950,256,464,6144,1536,ck,18,0,14.6645,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,597.2,1080.95,0.0 +gfx950,256,480,6144,1536,ck,18,0,14.6806,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,617.12,1094.83,0.0 +gfx950,256,80,7168,4096,ck,18,0,14.6956,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,319.66,2098.23,0.0 +gfx950,256,72,7168,4096,ck,18,0,14.7442,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,286.75,2081.31,0.0 +gfx950,256,48,7168,4096,ck,18,0,14.7478,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,191.12,2050.81,0.0 +gfx950,256,432,6144,1536,ck,18,0,14.7763,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,551.81,1042.83,0.0 +gfx950,256,104,7168,4096,ck,18,0,14.817,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,412.16,2110.89,0.0 +gfx950,256,88,7168,4096,ck,18,0,14.8317,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,348.4,2088.91,0.0 +gfx950,256,336,8192,1536,ck,18,0,14.8513,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,569.36,1252.69,0.0 +gfx950,256,512,6144,1536,ck,18,0,14.8937,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,648.84,1108.86,0.0 +gfx950,256,496,6144,1536,ck,18,0,14.9121,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,627.79,1092.66,0.0 +gfx950,256,56,7168,4096,ck,18,0,14.92,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,220.4,2037.02,0.0 +gfx950,256,918,8192,512,ck,0,0,14.9961,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,513.52,1314.0,0.0 +gfx950,256,112,7168,4096,ck,18,0,15.0084,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,438.2,2093.79,0.0 +gfx950,256,906,8192,512,ck,0,0,15.011,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,506.3,1299.19,0.0 +gfx950,256,944,8192,512,ck,0,0,15.0113,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,527.53,1341.93,0.0 +gfx950,256,368,8192,1536,ck,18,0,15.0156,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,616.76,1277.17,0.0 +gfx950,256,909,8192,512,ck,0,0,15.0294,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,507.36,1300.97,0.0 +gfx950,256,901,8192,512,ck,0,0,15.0315,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,502.82,1291.79,0.0 +gfx950,256,905,8192,512,ck,0,0,15.0342,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,504.96,1296.06,0.0 +gfx950,256,898,8192,512,ck,0,0,15.0391,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,500.89,1287.77,0.0 +gfx950,256,915,8192,512,ck,0,0,15.0468,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,510.11,1306.2,0.0 +gfx950,256,924,8192,512,ck,0,0,15.0556,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,514.83,1315.54,0.0 +gfx950,256,935,8192,512,ck,0,0,15.0659,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,520.6,1326.97,0.0 +gfx950,256,921,8192,512,ck,0,0,15.0665,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,512.79,1311.22,0.0 +gfx950,256,961,8192,512,ck,0,0,15.0711,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,534.89,1355.66,0.0 +gfx950,256,936,8192,512,ck,0,0,15.0782,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,520.73,1327.01,0.0 +gfx950,256,908,8192,512,ck,0,0,15.0787,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,505.14,1295.59,0.0 +gfx950,256,904,8192,512,ck,0,0,15.0959,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,502.34,1289.64,0.0 +gfx950,256,120,7168,4096,ck,18,0,15.0961,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,466.77,2091.4,0.0 +gfx950,256,934,8192,512,ck,0,0,15.0987,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,518.92,1322.97,0.0 +gfx950,256,917,8192,512,ck,0,0,15.1001,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,509.42,1303.83,0.0 +gfx950,256,945,8192,512,ck,0,0,15.1038,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,524.85,1334.83,0.0 +gfx950,256,923,8192,512,ck,0,0,15.1179,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,512.15,1309.0,0.0 +gfx950,256,948,8192,512,ck,0,0,15.1196,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,525.97,1336.79,0.0 +gfx950,256,919,8192,512,ck,0,0,15.1208,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,509.84,1304.28,0.0 +gfx950,256,931,8192,512,ck,0,0,15.1238,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,516.39,1317.43,0.0 +gfx950,256,943,8192,512,ck,0,0,15.1242,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,523.03,1330.8,0.0 +gfx950,256,964,8192,512,ck,0,0,15.1334,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,534.36,1353.43,0.0 +gfx950,256,960,8192,512,ck,0,0,15.1342,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,532.11,1348.9,0.0 +gfx950,256,946,8192,512,ck,0,0,15.1346,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,524.34,1333.23,0.0 +gfx950,256,940,8192,512,ck,0,0,15.1354,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,520.98,1326.46,0.0 +gfx950,256,958,8192,512,ck,0,0,15.1435,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,530.68,1345.84,0.0 +gfx950,256,954,8192,512,ck,0,0,15.1442,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,528.44,1341.31,0.0 +gfx950,256,933,8192,512,ck,0,0,15.1476,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,516.69,1317.59,0.0 +gfx950,256,979,8192,512,ck,0,0,15.1496,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,542.09,1368.72,0.0 +gfx950,256,939,8192,512,ck,0,0,15.1519,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,519.86,1323.9,0.0 +gfx950,256,971,8192,512,ck,0,0,15.1541,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,537.5,1359.39,0.0 +gfx950,256,957,8192,512,ck,0,0,15.1565,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,529.67,1343.57,0.0 +gfx950,256,974,8192,512,ck,0,0,15.1608,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,538.92,1362.13,0.0 +gfx950,256,952,8192,512,ck,0,0,15.162,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,526.71,1337.51,0.0 +gfx950,256,950,8192,512,ck,0,0,15.1662,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,525.46,1334.91,0.0 +gfx950,256,968,8192,512,ck,0,0,15.1749,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,535.11,1354.19,0.0 +gfx950,256,951,8192,512,ck,0,0,15.1769,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,525.64,1335.08,0.0 +gfx950,256,949,8192,512,ck,0,0,15.1829,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,524.33,1332.33,0.0 +gfx950,256,965,8192,512,ck,0,0,15.1938,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,532.78,1349.17,0.0 +gfx950,256,1002,8192,512,ck,0,0,15.208,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,552.69,1389.01,0.0 +gfx950,256,963,8192,512,ck,0,0,15.2173,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,530.86,1344.86,0.0 +gfx950,256,941,8192,512,ck,0,0,15.2189,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,518.68,1320.3,0.0 +gfx950,256,1000,8192,512,ck,0,0,15.2281,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,550.86,1384.96,0.0 +gfx950,256,1006,8192,512,ck,0,0,15.2341,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,553.95,1391.07,0.0 +gfx950,256,981,8192,512,ck,0,0,15.2391,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,540.01,1362.89,0.0 +gfx950,256,999,8192,512,ck,0,0,15.2408,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,549.85,1382.7,0.0 +gfx950,256,996,8192,512,ck,0,0,15.2413,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,548.19,1379.33,0.0 +gfx950,256,992,8192,512,ck,0,0,15.2456,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,545.83,1374.5,0.0 +gfx950,256,1001,8192,512,ck,0,0,15.266,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,550.05,1382.63,0.0 +gfx950,256,993,8192,512,ck,0,0,15.2667,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,545.62,1373.71,0.0 +gfx950,256,1005,8192,512,ck,0,0,15.2696,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,552.11,1386.73,0.0 +gfx950,256,1008,8192,512,ck,0,0,15.2817,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,553.32,1388.95,0.0 +gfx950,256,985,8192,512,ck,0,0,15.2872,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,540.5,1363.03,0.0 +gfx950,256,987,8192,512,ck,0,0,15.3031,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,541.04,1363.82,0.0 +gfx950,256,1087,128,7168,ck,8,2,15.3129,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,130.26,586.92,0.0 +gfx950,256,1015,8192,512,ck,0,0,15.3254,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,555.58,1392.7,0.0 +gfx950,256,1011,8192,512,ck,0,0,15.348,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,552.57,1386.25,0.0 +gfx950,256,1019,8192,512,ck,0,0,15.36,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,556.51,1393.97,0.0 +gfx950,256,1012,8192,512,ck,0,0,15.3609,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,552.65,1386.19,0.0 +gfx950,256,1010,8192,512,ck,0,0,15.3883,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,550.58,1381.52,0.0 +gfx950,256,1040,128,7168,ck,18,2,15.4171,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,123.79,560.32,0.0 +gfx950,256,40,7168,4608,ck,18,0,15.6778,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,168.54,2155.14,0.0 +gfx950,256,48,7168,4608,ck,18,0,15.8779,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,199.7,2137.53,0.0 +gfx950,256,56,7168,4608,ck,18,0,15.9613,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,231.77,2135.85,0.0 +gfx950,256,72,7168,4608,ck,18,0,15.9709,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,297.81,2153.55,0.0 +gfx950,256,80,7168,4608,ck,18,0,15.9936,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,330.43,2159.97,0.0 +gfx950,256,1255,128,7168,ck,8,2,16.0425,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,143.55,637.97,0.0 +gfx950,256,400,8192,1536,ck,0,0,16.0698,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,626.41,1229.07,0.0 +gfx950,256,88,7168,4608,ck,18,0,16.0921,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,361.25,2156.16,0.0 +gfx950,256,104,7168,4608,ck,18,0,16.1548,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,425.28,2166.56,0.0 +gfx950,256,432,8192,1536,ck,0,0,16.2041,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,670.92,1254.27,0.0 +gfx950,256,112,7168,4608,ck,18,0,16.2213,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,456.11,2167.02,0.0 +gfx950,256,120,7168,4608,ck,18,0,16.37,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,484.25,2156.59,0.0 +gfx950,256,464,8192,1536,ck,0,0,16.4194,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.17,1272.75,0.0 +gfx950,256,1047,8192,512,ck,18,0,16.5099,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,531.98,1325.53,0.0 +gfx950,256,1029,8192,512,ck,18,0,16.5528,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,521.48,1303.72,0.0 +gfx950,256,496,8192,1536,ck,0,0,16.5654,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.51,1296.15,0.0 +gfx950,256,1084,8192,512,ck,18,0,16.5732,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,548.67,1358.19,0.0 +gfx950,256,1036,8192,512,ck,18,0,16.5756,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,524.3,1309.07,0.0 +gfx950,256,1038,8192,512,ck,18,0,16.6079,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,524.29,1308.56,0.0 +gfx950,256,1046,8192,512,ck,18,0,16.6095,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,528.28,1316.57,0.0 +gfx950,256,1037,8192,512,ck,18,0,16.6122,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,523.65,1307.2,0.0 +gfx950,256,1085,8192,512,ck,18,0,16.6664,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,546.11,1351.61,0.0 +gfx950,256,1053,8192,512,ck,18,0,16.6719,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,529.83,1318.73,0.0 +gfx950,256,1040,8192,512,ck,18,0,16.6732,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,523.24,1305.46,0.0 +gfx950,256,1048,8192,512,ck,18,0,16.6771,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,527.15,1313.26,0.0 +gfx950,256,1035,8192,512,ck,18,0,16.6791,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,520.54,1299.93,0.0 +gfx950,256,1042,8192,512,ck,18,0,16.6857,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,523.86,1306.5,0.0 +gfx950,256,1061,8192,512,ck,18,0,16.6878,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,533.34,1325.58,0.0 +gfx950,256,1056,8192,512,ck,18,0,16.6882,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,530.82,1320.48,0.0 +gfx950,256,621,6144,1536,ck,0,0,16.6961,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.02,1079.41,0.0 +gfx950,256,1080,8192,512,ck,18,0,16.6993,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,542.52,1343.89,0.0 +gfx950,256,1050,8192,512,ck,18,0,16.7077,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,527.18,1312.87,0.0 +gfx950,256,1063,8192,512,ck,18,0,16.7162,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,533.44,1325.35,0.0 +gfx950,256,1072,8192,512,ck,18,0,16.722,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,537.77,1333.98,0.0 +gfx950,256,1069,8192,512,ck,18,0,16.7238,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,536.21,1330.81,0.0 +gfx950,256,1076,8192,512,ck,18,0,16.7258,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,539.65,1337.72,0.0 +gfx950,256,1073,8192,512,ck,18,0,16.7266,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,538.12,1334.62,0.0 +gfx950,256,1051,8192,512,ck,18,0,16.7292,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,527.01,1312.2,0.0 +gfx950,256,1075,8192,512,ck,18,0,16.7678,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,537.8,1333.36,0.0 +gfx950,256,1071,8192,512,ck,18,0,16.7722,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,535.66,1328.98,0.0 +gfx950,256,1116,8192,512,ck,18,0,16.9507,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,552.29,1359.84,0.0 +gfx950,256,1106,8192,512,ck,18,0,17.0223,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,545.04,1344.19,0.0 +gfx950,256,1639,128,7168,ck,8,2,17.0356,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,176.55,768.12,0.0 +gfx950,256,1098,8192,512,ck,18,0,17.0579,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,539.97,1333.46,0.0 +gfx950,256,1124,8192,512,ck,18,0,17.0641,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,552.55,1358.72,0.0 +gfx950,256,1120,8192,512,ck,18,0,17.0775,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,550.15,1353.7,0.0 +gfx950,256,1140,8192,512,ck,18,0,17.0808,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,559.87,1373.22,0.0 +gfx950,256,1099,8192,512,ck,18,0,17.0955,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,539.27,1331.52,0.0 +gfx950,256,1091,8192,512,ck,18,0,17.1013,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,535.16,1323.16,0.0 +gfx950,256,1126,8192,512,ck,18,0,17.1151,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,551.89,1356.65,0.0 +gfx950,256,1115,8192,512,ck,18,0,17.1151,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,546.49,1345.79,0.0 +gfx950,256,1108,8192,512,ck,18,0,17.1204,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,542.89,1338.47,0.0 +gfx950,256,1150,8192,512,ck,18,0,17.1216,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,563.43,1379.82,0.0 +gfx950,256,1142,8192,512,ck,18,0,17.1237,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,559.45,1371.76,0.0 +gfx950,256,1149,8192,512,ck,18,0,17.1312,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,562.63,1378.06,0.0 +gfx950,256,1136,8192,512,ck,18,0,17.1323,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,556.23,1365.15,0.0 +gfx950,256,1122,8192,512,ck,18,0,17.1381,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,549.19,1350.89,0.0 +gfx950,256,1144,8192,512,ck,18,0,17.16,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,559.24,1370.82,0.0 +gfx950,256,1134,8192,512,ck,18,0,17.1721,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,553.96,1360.02,0.0 +gfx950,256,1097,8192,512,ck,18,0,17.1884,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,535.38,1322.36,0.0 +gfx950,256,1685,128,7168,ck,8,2,17.2265,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,179.49,779.44,0.0 +gfx950,256,1808,128,7168,ck,8,2,17.2811,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,191.98,829.81,0.0 +gfx950,256,1718,128,7168,ck,8,2,17.3262,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,181.95,789.09,0.0 +gfx950,256,1809,128,7168,ck,8,2,17.4448,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,190.29,822.45,0.0 +gfx950,256,1816,128,7168,ck,8,2,17.4451,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,191.02,825.42,0.0 +gfx950,256,2000,128,7168,ck,8,2,17.473,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,210.04,902.28,0.0 +gfx950,256,1821,128,7168,ck,8,2,17.4856,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,191.1,825.63,0.0 +gfx950,256,1710,128,7168,ck,18,2,17.7075,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,177.21,768.74,0.0 +gfx950,256,2006,128,7168,ck,8,2,17.7099,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,207.85,892.72,0.0 +gfx950,256,2037,128,7168,ck,8,2,17.744,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,210.66,903.98,0.0 +gfx950,256,1172,8192,512,ck,18,0,17.9265,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,548.43,1338.6,0.0 +gfx950,256,1173,8192,512,ck,18,0,17.9507,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,548.16,1337.74,0.0 +gfx950,256,1177,8192,512,ck,18,0,17.9877,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,548.9,1338.74,0.0 +gfx950,256,1934,128,7168,ck,8,1,17.9972,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,197.19,848.77,0.0 +gfx950,256,1213,8192,512,ck,18,0,18.0063,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,565.1,1371.14,0.0 +gfx950,256,1167,8192,512,ck,18,0,18.0404,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,542.64,1325.47,0.0 +gfx950,256,1169,8192,512,ck,18,0,18.0502,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,543.28,1326.62,0.0 +gfx950,256,1155,8192,512,ck,18,0,18.0561,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,536.6,1313.08,0.0 +gfx950,256,1208,8192,512,ck,18,0,18.0612,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,561.06,1362.29,0.0 +gfx950,256,1181,8192,512,ck,18,0,18.0659,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,548.38,1336.69,0.0 +gfx950,256,1159,8192,512,ck,18,0,18.0706,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,538.02,1315.77,0.0 +gfx950,256,1211,8192,512,ck,18,0,18.0725,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,562.1,1364.25,0.0 +gfx950,256,1179,8192,512,ck,18,0,18.0861,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,546.84,1333.33,0.0 +gfx950,256,1161,8192,512,ck,18,0,18.0961,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,538.19,1315.78,0.0 +gfx950,256,1180,8192,512,ck,18,0,18.1213,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,546.24,1331.67,0.0 +gfx950,256,1210,8192,512,ck,18,0,18.1234,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,560.06,1359.48,0.0 +gfx950,256,1153,8192,512,ck,18,0,18.1289,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,533.52,1305.95,0.0 +gfx950,256,1176,8192,512,ck,18,0,18.1332,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,544.03,1327.07,0.0 +gfx950,256,1206,8192,512,ck,18,0,18.1335,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,557.9,1355.0,0.0 +gfx950,256,1164,8192,512,ck,18,0,18.1354,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,538.41,1315.73,0.0 +gfx950,256,1259,8192,512,ck,18,0,18.1413,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,582.17,1403.78,0.0 +gfx950,256,1188,8192,512,ck,18,0,18.1549,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,548.92,1336.65,0.0 +gfx950,256,1170,8192,512,ck,18,0,18.1604,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,540.44,1319.5,0.0 +gfx950,256,1215,8192,512,ck,18,0,18.1689,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,560.97,1360.73,0.0 +gfx950,256,1203,8192,512,ck,18,0,18.1738,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,555.28,1349.21,0.0 +gfx950,256,1260,8192,512,ck,18,0,18.1834,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,581.28,1401.46,0.0 +gfx950,256,1279,8192,512,ck,18,0,18.2126,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,589.1,1416.84,0.0 +gfx950,256,1817,128,7168,ck,8,2,18.2156,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,183.04,790.91,0.0 +gfx950,256,1197,8192,512,ck,18,0,18.2216,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,551.06,1340.1,0.0 +gfx950,256,1168,8192,512,ck,18,0,18.2396,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,537.18,1311.92,0.0 +gfx950,256,1192,8192,512,ck,18,0,18.2452,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,548.05,1333.74,0.0 +gfx950,256,1253,8192,512,ck,18,0,18.2547,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,575.79,1389.5,0.0 +gfx950,256,1207,8192,512,ck,18,0,18.2645,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,554.36,1346.21,0.0 +gfx950,256,1261,8192,512,ck,18,0,18.2689,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,579.02,1395.82,0.0 +gfx950,256,1221,8192,512,ck,18,0,18.2862,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,560.12,1357.54,0.0 +gfx950,256,1234,8192,512,ck,18,0,18.3146,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,565.21,1367.43,0.0 +gfx950,256,1247,8192,512,ck,18,0,18.3178,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,571.06,1379.18,0.0 +gfx950,256,1246,8192,512,ck,18,0,18.3179,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,570.6,1378.25,0.0 +gfx950,256,1222,8192,512,ck,18,0,18.3179,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,559.61,1356.12,0.0 +gfx950,256,1255,8192,512,ck,18,0,18.3209,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,574.63,1386.33,0.0 +gfx950,256,1271,8192,512,ck,18,0,18.3209,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,581.95,1401.08,0.0 +gfx950,256,1244,8192,512,ck,18,0,18.3252,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,569.46,1375.86,0.0 +gfx950,256,1249,8192,512,ck,18,0,18.334,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,571.47,1379.81,0.0 +gfx950,256,1218,8192,512,ck,18,0,18.3633,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,556.4,1349.08,0.0 +gfx950,256,1241,8192,512,ck,18,0,18.3673,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,566.78,1369.95,0.0 +gfx950,256,1225,8192,512,ck,18,0,18.3872,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,558.87,1353.76,0.0 +gfx950,256,1231,8192,512,ck,18,0,18.3947,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,561.38,1358.72,0.0 +gfx950,256,1227,8192,512,ck,18,0,18.3947,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,559.55,1355.05,0.0 +gfx950,256,1252,8192,512,ck,18,0,18.3963,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,570.9,1377.89,0.0 +gfx950,256,1265,8192,512,ck,18,0,18.3983,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,576.77,1389.68,0.0 +gfx950,256,1256,8192,512,ck,18,0,18.4074,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,572.38,1380.73,0.0 +gfx950,256,1266,8192,512,ck,18,0,18.4107,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,576.84,1389.66,0.0 +gfx950,256,1229,8192,512,ck,18,0,18.4204,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,559.68,1354.99,0.0 +gfx950,256,1264,8192,512,ck,18,0,18.4213,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,575.59,1387.03,0.0 +gfx950,256,1262,8192,512,ck,18,0,18.4335,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,574.3,1384.28,0.0 +gfx950,256,1267,8192,512,ck,18,0,18.4391,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,576.4,1388.44,0.0 +gfx950,256,1268,8192,512,ck,18,0,18.4662,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,576.01,1387.31,0.0 +gfx950,256,1220,8192,512,ck,18,0,18.4692,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,554.12,1343.18,0.0 +gfx950,256,1275,8192,512,ck,18,0,18.483,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,578.67,1392.45,0.0 +gfx950,256,1263,8192,512,ck,18,0,18.5018,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,572.64,1380.08,0.0 +gfx950,256,1224,8192,512,ck,18,0,18.516,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,554.53,1343.43,0.0 +gfx950,256,1236,8192,512,ck,18,0,18.6206,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,556.82,1346.78,0.0 +gfx950,256,1314,8192,512,ck,18,0,19.1436,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,575.79,1378.82,0.0 +gfx950,256,1331,8192,512,ck,18,0,19.1614,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,582.69,1392.53,0.0 +gfx950,256,1311,8192,512,ck,18,0,19.1825,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,573.31,1373.39,0.0 +gfx950,256,1329,8192,512,ck,18,0,19.2281,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,579.8,1385.94,0.0 +gfx950,256,1337,8192,512,ck,18,0,19.2404,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,582.92,1392.08,0.0 +gfx950,256,1308,8192,512,ck,18,0,19.2531,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,569.9,1365.72,0.0 +gfx950,256,1348,8192,512,ck,18,0,19.2721,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,586.75,1399.44,0.0 +gfx950,256,1316,8192,512,ck,18,0,19.3069,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,571.79,1368.91,0.0 +gfx950,256,1289,8192,512,ck,18,0,19.32,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,559.67,1344.37,0.0 +gfx950,256,1338,8192,512,ck,18,0,19.3242,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,580.82,1386.92,0.0 +gfx950,256,2056,128,7168,ck,8,2,19.3377,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,195.1,836.77,0.0 +gfx950,256,1341,8192,512,ck,18,0,19.3695,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,580.76,1386.29,0.0 +gfx950,256,1365,8192,512,ck,18,0,19.3754,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,590.98,1406.8,0.0 +gfx950,256,1323,8192,512,ck,18,0,19.3785,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,572.7,1369.96,0.0 +gfx950,256,1301,8192,512,ck,18,0,19.3857,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,562.97,1350.27,0.0 +gfx950,256,1352,8192,512,ck,18,0,19.3875,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,584.99,1394.59,0.0 +gfx950,256,1361,8192,512,ck,18,0,19.3879,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,588.87,1402.41,0.0 +gfx950,256,1295,8192,512,ck,18,0,19.3899,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,560.25,1344.75,0.0 +gfx950,256,1359,8192,512,ck,18,0,19.395,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,587.79,1400.15,0.0 +gfx950,256,1297,8192,512,ck,18,0,19.4036,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,560.72,1345.54,0.0 +gfx950,256,1376,8192,512,ck,18,0,19.4067,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,594.78,1414.11,0.0 +gfx950,256,1343,8192,512,ck,18,0,19.415,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,580.27,1384.79,0.0 +gfx950,256,1302,8192,512,ck,18,0,19.4165,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,562.51,1349.0,0.0 +gfx950,256,1381,8192,512,ck,18,0,19.4194,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,596.55,1417.54,0.0 +gfx950,256,1312,8192,512,ck,18,0,19.4198,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,566.73,1357.47,0.0 +gfx950,256,1358,8192,512,ck,18,0,19.422,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,586.54,1397.34,0.0 +gfx950,256,2054,128,7168,ck,8,2,19.4223,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,194.06,832.36,0.0 +gfx950,256,1292,8192,512,ck,18,0,19.4226,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,558.01,1339.88,0.0 +gfx950,256,1309,8192,512,ck,18,0,19.4228,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,565.35,1354.65,0.0 +gfx950,256,1968,128,7168,ck,8,0,19.4303,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,185.86,799.16,0.0 +gfx950,256,1396,8192,512,ck,18,0,19.4577,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,601.84,1427.77,0.0 +gfx950,256,1300,8192,512,ck,18,0,19.47,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,560.1,1343.56,0.0 +gfx950,256,1326,8192,512,ck,18,0,19.4792,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,571.03,1365.48,0.0 +gfx950,256,1388,8192,512,ck,18,0,19.4808,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,597.69,1419.14,0.0 +gfx950,256,1294,8192,512,ck,18,0,19.483,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,557.15,1337.46,0.0 +gfx950,256,1345,8192,512,ck,18,0,19.5021,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,578.54,1380.33,0.0 +gfx950,256,2032,128,7168,ck,8,0,19.5057,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,191.16,820.43,0.0 +gfx950,256,2085,128,7168,ck,8,2,19.5115,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,196.09,840.35,0.0 +gfx950,256,1354,8192,512,ck,18,0,19.5117,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,582.12,1387.45,0.0 +gfx950,256,1379,8192,512,ck,18,0,19.5212,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,592.58,1408.41,0.0 +gfx950,256,2087,128,7168,ck,8,2,19.5215,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,196.18,840.68,0.0 +gfx950,256,1310,8192,512,ck,18,0,19.5216,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,562.92,1348.66,0.0 +gfx950,256,1347,8192,512,ck,18,0,19.5239,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,578.75,1380.52,0.0 +gfx950,256,1382,8192,512,ck,18,0,19.5254,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,593.74,1410.7,0.0 +gfx950,256,1380,8192,512,ck,18,0,19.5277,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,592.81,1408.81,0.0 +gfx950,256,1393,8192,512,ck,18,0,19.541,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,597.99,1419.09,0.0 +gfx950,256,1395,8192,512,ck,18,0,19.5658,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,598.09,1419.02,0.0 +gfx950,256,1374,8192,512,ck,18,0,19.5691,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,588.99,1400.65,0.0 +gfx950,256,1404,8192,512,ck,18,0,19.5746,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,601.68,1426.15,0.0 +gfx950,256,1403,8192,512,ck,18,0,19.5798,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,601.09,1424.91,0.0 +gfx950,256,1303,8192,512,ck,18,0,19.5806,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,558.22,1338.56,0.0 +gfx950,256,1377,8192,512,ck,18,0,19.5818,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,589.89,1402.33,0.0 +gfx950,256,2109,128,7168,ck,8,2,19.5891,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,197.56,846.12,0.0 +gfx950,256,1360,8192,512,ck,18,0,19.5917,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,582.31,1386.96,0.0 +gfx950,256,1399,8192,512,ck,18,0,19.5948,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,598.92,1420.37,0.0 +gfx950,256,1367,8192,512,ck,18,0,19.5965,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,585.17,1392.65,0.0 +gfx950,256,2091,128,7168,ck,8,2,19.5991,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,195.77,838.87,0.0 +gfx950,256,1406,8192,512,ck,18,0,19.603,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,601.66,1425.81,0.0 +gfx950,256,1385,8192,512,ck,18,0,19.6048,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,592.62,1407.58,0.0 +gfx950,256,1401,8192,512,ck,18,0,19.6056,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,599.44,1421.31,0.0 +gfx950,256,1375,8192,512,ck,18,0,19.6058,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,588.31,1398.89,0.0 +gfx950,256,1383,8192,512,ck,18,0,19.6186,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,591.35,1404.86,0.0 +gfx950,256,2128,128,7168,ck,8,2,19.6263,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,198.96,851.7,0.0 +gfx950,256,1384,8192,512,ck,18,0,19.63,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,591.43,1404.91,0.0 +gfx950,256,1369,8192,512,ck,18,0,19.6362,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,584.84,1391.56,0.0 +gfx950,256,1371,8192,512,ck,18,0,19.6539,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,585.17,1392.02,0.0 +gfx950,256,1351,8192,512,ck,18,0,19.6804,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,575.85,1372.98,0.0 +gfx950,256,1397,8192,512,ck,18,0,19.7292,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,593.99,1408.98,0.0 +gfx950,256,2112,128,7168,ck,18,1,19.7591,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,196.14,839.97,0.0 +gfx950,256,1344,8192,512,ck,18,0,19.8271,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,568.63,1356.86,0.0 +gfx950,256,2160,128,7168,ck,8,2,19.8358,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,199.82,854.68,0.0 +gfx950,256,1387,8192,512,ck,18,0,19.8528,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,586.06,1391.7,0.0 +gfx950,256,2166,128,7168,ck,8,2,19.8745,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,199.99,855.26,0.0 +gfx950,256,2161,128,7168,ck,8,2,19.901,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,199.26,852.26,0.0 +gfx950,256,2088,128,7168,ck,8,2,19.9122,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,192.42,824.56,0.0 +gfx950,256,1469,8192,512,ck,0,0,19.939,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,618.03,1455.16,0.0 +gfx950,256,2127,128,7168,ck,18,1,19.9431,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,195.71,837.8,0.0 +gfx950,256,2129,128,7168,ck,18,1,19.9487,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,195.84,838.31,0.0 +gfx950,256,1428,8192,512,ck,0,0,19.951,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,600.42,1419.57,0.0 +gfx950,256,1531,8192,512,ck,0,0,19.9532,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,643.65,1506.63,0.0 +gfx950,256,2086,128,7168,ck,18,1,19.9784,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,191.6,821.09,0.0 +gfx950,256,1500,8192,512,ck,0,0,19.9813,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,629.73,1478.3,0.0 +gfx950,256,2111,128,7168,ck,18,1,19.9929,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,193.75,829.77,0.0 +gfx950,256,1490,8192,512,ck,0,0,19.995,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,625.11,1468.83,0.0 +gfx950,256,2149,128,7168,ck,8,2,19.9982,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,197.19,843.66,0.0 +gfx950,256,2130,128,7168,ck,18,1,19.9987,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,195.44,836.59,0.0 +gfx950,256,2188,128,7168,ck,18,1,19.9991,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,200.76,858.1,0.0 +gfx950,256,1417,8192,512,ck,0,0,20.0008,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,594.31,1406.74,0.0 +gfx950,256,2186,128,7168,ck,18,1,20.0035,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,200.53,857.17,0.0 +gfx950,256,2185,128,7168,ck,18,1,20.0048,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,200.43,856.74,0.0 +gfx950,256,1476,8192,512,ck,0,0,20.0183,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,618.51,1455.31,0.0 +gfx950,256,1506,8192,512,ck,0,0,20.0233,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,630.93,1480.26,0.0 +gfx950,256,1450,8192,512,ck,0,0,20.0256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,607.4,1432.84,0.0 +gfx950,256,1425,8192,512,ck,0,0,20.0264,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,596.9,1411.69,0.0 +gfx950,256,1461,8192,512,ck,0,0,20.0286,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,611.91,1441.91,0.0 +gfx950,256,1431,8192,512,ck,0,0,20.0314,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,599.26,1416.4,0.0 +gfx950,256,2163,128,7168,ck,8,2,20.0339,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,198.12,847.34,0.0 +gfx950,256,1459,8192,512,ck,0,0,20.06,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,610.12,1437.96,0.0 +gfx950,256,2146,128,7168,ck,8,2,20.0645,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,196.26,839.76,0.0 +gfx950,256,1514,8192,512,ck,0,0,20.0665,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,632.91,1483.81,0.0 +gfx950,256,2249,128,7168,ck,8,2,20.0678,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,205.65,877.73,0.0 +gfx950,256,1430,8192,512,ck,0,0,20.0699,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,597.7,1412.84,0.0 +gfx950,256,2189,128,7168,ck,8,2,20.0701,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,200.14,855.43,0.0 +gfx950,256,1410,8192,512,ck,0,0,20.0705,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,589.32,1395.96,0.0 +gfx950,256,1511,8192,512,ck,0,0,20.0762,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,631.35,1480.57,0.0 +gfx950,256,1437,8192,512,ck,0,0,20.0796,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,600.33,1418.05,0.0 +gfx950,256,1498,8192,512,ck,0,0,20.0809,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,625.78,1469.28,0.0 +gfx950,256,1471,8192,512,ck,0,0,20.0902,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,614.21,1445.9,0.0 +gfx950,256,1423,8192,512,ck,0,0,20.0917,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,594.13,1405.42,0.0 +gfx950,256,1496,8192,512,ck,0,0,20.0971,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,624.44,1466.42,0.0 +gfx950,256,1454,8192,512,ck,0,0,20.1064,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,606.62,1430.44,0.0 +gfx950,256,1435,8192,512,ck,0,0,20.1075,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,598.66,1414.4,0.0 +gfx950,256,1497,8192,512,ck,0,0,20.1109,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,624.42,1466.25,0.0 +gfx950,256,1485,128,7168,ck,8,0,20.1264,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,135.39,593.36,0.0 +gfx950,256,2164,128,7168,ck,18,1,20.1278,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,197.29,843.76,0.0 +gfx950,256,1444,8192,512,ck,0,0,20.128,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,601.81,1420.52,0.0 +gfx950,256,1452,8192,512,ck,0,0,20.1296,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,605.09,1427.12,0.0 +gfx950,256,2248,128,7168,ck,18,1,20.1354,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,204.87,874.41,0.0 +gfx950,256,1521,8192,512,ck,0,0,20.1357,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,633.65,1484.58,0.0 +gfx950,256,2219,128,7168,ck,8,2,20.1366,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,202.21,863.67,0.0 +gfx950,256,1519,8192,512,ck,0,0,20.1394,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,632.7,1482.63,0.0 +gfx950,256,1495,8192,512,ck,0,0,20.1439,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,622.57,1462.17,0.0 +gfx950,256,1447,8192,512,ck,0,0,20.1446,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,602.56,1421.86,0.0 +gfx950,256,1482,8192,512,ck,0,0,20.1466,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,617.07,1451.07,0.0 +gfx950,256,2215,128,7168,ck,8,2,20.1496,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,201.72,861.64,0.0 +gfx950,256,1492,8192,512,ck,0,0,20.15,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,621.13,1459.21,0.0 +gfx950,256,1441,8192,512,ck,0,0,20.1574,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,599.68,1415.93,0.0 +gfx950,256,2368,128,7168,ck,18,1,20.1605,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,215.54,917.51,0.0 +gfx950,256,1503,8192,512,ck,0,0,20.1612,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,625.36,1467.62,0.0 +gfx950,256,1333,128,7168,ck,8,0,20.164,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,121.31,536.29,0.0 +gfx950,256,1467,8192,512,ck,0,0,20.1649,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,610.27,1437.19,0.0 +gfx950,256,1502,8192,512,ck,0,0,20.1734,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,624.57,1465.9,0.0 +gfx950,256,1516,8192,512,ck,0,0,20.1735,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,630.39,1477.61,0.0 +gfx950,256,2207,128,7168,ck,8,2,20.1782,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,200.7,857.47,0.0 +gfx950,256,2150,128,7168,ck,18,1,20.1784,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,195.52,836.49,0.0 +gfx950,256,2268,128,7168,ck,8,2,20.1793,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,206.24,879.87,0.0 +gfx950,256,2226,128,7168,ck,8,2,20.1794,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,202.42,864.41,0.0 +gfx950,256,1432,8192,512,ck,0,0,20.1796,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,595.28,1406.84,0.0 +gfx950,256,1493,8192,512,ck,0,0,20.1802,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,620.62,1457.87,0.0 +gfx950,256,1499,8192,512,ck,0,0,20.1871,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,622.9,1462.39,0.0 +gfx950,256,1517,128,7168,ck,8,0,20.1873,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,137.89,603.34,0.0 +gfx950,256,1477,8192,512,ck,0,0,20.1916,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,613.62,1443.65,0.0 +gfx950,256,1601,128,7168,ck,8,0,20.1922,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,145.49,634.07,0.0 +gfx950,256,1424,8192,512,ck,0,0,20.1962,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,591.47,1398.99,0.0 +gfx950,256,1443,8192,512,ck,0,0,20.1997,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,599.25,1414.64,0.0 +gfx950,256,1442,8192,512,ck,0,0,20.2143,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,598.41,1412.78,0.0 +gfx950,256,1490,128,7168,ck,8,0,20.2159,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,135.25,592.57,0.0 +gfx950,256,2301,128,7168,ck,8,2,20.216,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,208.86,890.39,0.0 +gfx950,256,2252,128,7168,ck,18,1,20.2224,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,204.35,872.12,0.0 +gfx950,256,1523,8192,512,ck,0,0,20.2263,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,631.65,1479.6,0.0 +gfx950,256,2216,128,7168,ck,8,2,20.2364,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,200.94,858.31,0.0 +gfx950,256,2344,128,7168,ck,8,2,20.2444,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,212.47,904.91,0.0 +gfx950,256,1533,8192,512,ck,0,0,20.2481,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,635.11,1486.36,0.0 +gfx950,256,2251,128,7168,ck,8,2,20.2502,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,203.98,870.56,0.0 +gfx950,256,1590,128,7168,ck,8,0,20.2529,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,144.06,628.14,0.0 +gfx950,256,2090,128,7168,ck,8,2,20.2749,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,189.16,810.54,0.0 +gfx950,256,1599,128,7168,ck,8,0,20.2964,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,144.57,630.09,0.0 +gfx950,256,2387,128,7168,ck,18,1,20.3065,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,215.7,917.86,0.0 +gfx950,256,813,128,7168,ck,8,0,20.3352,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,73.36,341.93,0.0 +gfx950,256,2356,128,7168,ck,8,2,20.3441,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,212.51,904.85,0.0 +gfx950,256,2412,128,7168,ck,8,2,20.3586,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,217.4,924.63,0.0 +gfx950,256,2390,128,7168,ck,18,1,20.3747,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,215.25,915.88,0.0 +gfx950,256,1580,128,7168,ck,8,0,20.3766,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,142.29,620.68,0.0 +gfx950,256,809,128,7168,ck,8,0,20.3796,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,72.84,339.73,0.0 +gfx950,256,849,128,7168,ck,8,0,20.3835,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,76.43,354.23,0.0 +gfx950,256,869,128,7168,ck,8,0,20.398,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,78.18,361.26,0.0 +gfx950,256,1334,128,7168,ck,8,0,20.4019,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,119.98,530.4,0.0 +gfx950,256,2358,128,7168,ck,18,1,20.4156,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,211.94,902.41,0.0 +gfx950,256,1553,128,7168,ck,8,0,20.4172,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,139.58,609.63,0.0 +gfx950,256,1623,128,7168,ck,8,0,20.419,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,145.86,635.03,0.0 +gfx950,256,2254,128,7168,ck,18,1,20.422,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,202.53,864.32,0.0 +gfx950,256,1944,128,7168,ck,8,0,20.4354,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,174.56,751.14,0.0 +gfx950,256,920,128,7168,ck,8,0,20.4416,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,82.59,379.01,0.0 +gfx950,256,806,128,7168,ck,8,0,20.4458,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,72.34,337.54,0.0 +gfx950,256,2369,128,7168,ck,8,2,20.4463,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,212.61,905.05,0.0 +gfx950,256,1987,128,7168,ck,8,0,20.4485,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,178.31,766.27,0.0 +gfx950,256,2378,128,7168,ck,8,2,20.4532,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,213.35,908.01,0.0 +gfx950,256,1548,128,7168,ck,8,0,20.4533,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,138.88,606.74,0.0 +gfx950,256,2415,128,7168,ck,18,1,20.4563,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,216.63,921.3,0.0 +gfx950,256,2002,128,7168,ck,8,0,20.4731,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,179.44,770.78,0.0 +gfx950,256,2101,128,7168,ck,8,2,20.4764,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,188.28,806.55,0.0 +gfx950,256,910,128,7168,ck,8,0,20.4801,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,81.54,374.67,0.0 +gfx950,256,1978,128,7168,ck,8,0,20.4845,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,177.19,761.66,0.0 +gfx950,256,1185,128,7168,ck,8,0,20.4892,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,106.13,474.15,0.0 +gfx950,256,2041,128,7168,ck,8,0,20.5033,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,182.67,783.77,0.0 +gfx950,256,2403,128,7168,ck,18,1,20.5123,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,214.97,914.45,0.0 +gfx950,256,2012,128,7168,ck,8,0,20.513,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,179.99,772.9,0.0 +gfx950,256,677,6144,1536,ck,18,0,20.5232,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,622.61,915.84,0.0 +gfx950,256,2004,128,7168,ck,8,0,20.5297,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,179.12,769.38,0.0 +gfx950,256,2009,128,7168,ck,8,0,20.5334,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,179.54,771.05,0.0 +gfx950,256,2035,128,7168,ck,8,0,20.5359,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,181.84,780.36,0.0 +gfx950,256,1991,128,7168,ck,8,0,20.5415,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,177.86,764.24,0.0 +gfx950,256,930,128,7168,ck,8,0,20.586,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,82.9,379.96,0.0 +gfx950,256,1144,128,7168,ck,8,0,20.59,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,101.95,457.05,0.0 +gfx950,256,2003,128,7168,ck,8,0,20.5916,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,178.5,766.71,0.0 +gfx950,256,814,128,7168,ck,8,0,20.5946,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,72.53,337.98,0.0 +gfx950,256,2011,128,7168,ck,8,0,20.605,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,179.09,769.09,0.0 +gfx950,256,2445,128,7168,ck,18,1,20.6066,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,217.73,925.39,0.0 +gfx950,256,1112,128,7168,ck,8,0,20.61,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,99.01,445.07,0.0 +gfx950,256,2443,128,7168,ck,8,2,20.6252,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,217.35,923.84,0.0 +gfx950,256,1980,128,7168,ck,8,0,20.6374,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,176.05,756.73,0.0 +gfx950,256,2567,128,7168,ck,18,1,20.655,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,228.05,967.07,0.0 +gfx950,256,902,128,7168,ck,8,0,20.658,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,80.12,368.57,0.0 +gfx950,256,2479,128,7168,ck,18,1,20.6791,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,219.98,934.35,0.0 +gfx950,256,2486,128,7168,ck,18,1,20.6923,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,220.46,936.27,0.0 +gfx950,256,2126,128,7168,ck,8,2,20.7011,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,188.46,806.77,0.0 +gfx950,256,2217,128,7168,ck,8,2,20.7018,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,196.51,839.37,0.0 +gfx950,256,2597,128,7168,ck,18,1,20.7517,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,229.64,973.3,0.0 +gfx950,256,2441,128,7168,ck,8,2,20.7532,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,215.83,917.42,0.0 +gfx950,256,2148,128,7168,ck,8,2,20.7796,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,189.69,811.58,0.0 +gfx950,256,1132,128,7168,ck,8,0,20.7832,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,99.95,448.51,0.0 +gfx950,256,877,128,7168,ck,8,0,20.7832,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,77.43,357.42,0.0 +gfx950,256,2250,128,7168,ck,18,1,20.7889,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,198.6,847.64,0.0 +gfx950,256,2515,128,7168,ck,18,1,20.7906,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,221.98,942.2,0.0 +gfx950,256,2548,128,7168,ck,18,1,20.8182,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,224.59,952.72,0.0 +gfx950,256,2658,128,7168,ck,18,1,20.9837,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,232.44,984.12,0.0 +gfx950,256,136,7168,4096,ck,18,0,20.9979,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,380.32,1517.62,0.0 +gfx950,256,144,7168,4096,ck,18,0,21.0657,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,401.4,1519.74,0.0 +gfx950,256,200,7168,4096,ck,18,0,21.0827,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,557.05,1567.47,0.0 +gfx950,256,2282,128,7168,ck,8,2,21.0858,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,198.59,846.97,0.0 +gfx950,256,2713,128,7168,ck,18,1,21.0858,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,236.1,998.72,0.0 +gfx950,256,2534,128,7168,ck,8,2,21.1044,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,220.33,934.87,0.0 +gfx950,256,734,6144,1536,ck,18,0,21.1349,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,655.49,926.62,0.0 +gfx950,256,2355,128,7168,ck,18,1,21.1562,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,204.26,869.77,0.0 +gfx950,256,2544,128,7168,ck,18,1,21.1944,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,220.26,934.41,0.0 +gfx950,256,152,7168,4096,ck,18,0,21.2133,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,420.75,1516.11,0.0 +gfx950,256,2794,128,7168,ck,18,1,21.2593,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,241.17,1018.86,0.0 +gfx950,256,2464,128,7168,ck,8,2,21.2955,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,212.32,902.08,0.0 +gfx950,256,2365,128,7168,ck,18,1,21.3173,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,203.58,866.68,0.0 +gfx950,256,2748,128,7168,ck,18,1,21.3577,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,236.1,998.17,0.0 +gfx950,256,809,6144,1536,ck,18,0,21.3738,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,714.4,964.77,0.0 +gfx950,256,2747,128,7168,ck,18,1,21.3982,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,235.57,995.94,0.0 +gfx950,256,2570,128,7168,ck,18,1,21.4158,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,220.21,933.76,0.0 +gfx950,256,208,7168,4096,ck,18,0,21.4243,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,570.09,1549.36,0.0 +gfx950,256,2304,128,7168,ck,7,0,21.4341,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,197.25,840.83,0.0 +gfx950,256,2594,128,7168,ck,18,1,21.4718,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,221.69,939.62,0.0 +gfx950,256,2444,128,7168,ck,18,1,21.4787,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,208.8,887.47,0.0 +gfx950,256,2235,128,7168,ck,7,0,21.5066,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,190.7,814.18,0.0 +gfx950,256,816,6144,1536,ck,18,0,21.5119,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,715.95,963.07,0.0 +gfx950,256,216,7168,4096,ck,18,0,21.5318,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,589.06,1548.47,0.0 +gfx950,256,2116,128,7168,ck,7,0,21.5607,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,180.09,771.16,0.0 +gfx950,256,2119,128,7168,ck,7,0,21.5641,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,180.32,772.07,0.0 +gfx950,256,168,7168,4096,ck,18,0,21.6361,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,455.95,1500.12,0.0 +gfx950,256,830,6144,1536,ck,18,0,21.684,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,722.46,964.36,0.0 +gfx950,256,814,6144,1536,ck,18,0,21.7468,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,706.48,951.4,0.0 +gfx950,256,176,7168,4096,ck,18,0,21.7684,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,474.76,1497.77,0.0 +gfx950,256,2636,128,7168,ck,18,1,21.776,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,222.13,940.81,0.0 +gfx950,256,813,6144,1536,ck,18,0,21.7928,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,704.13,948.76,0.0 +gfx950,256,3000,128,7168,ck,7,0,21.8011,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,252.51,1063.69,0.0 +gfx950,256,806,6144,1536,ck,18,0,21.8095,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,697.53,943.59,0.0 +gfx950,256,1594,8192,512,ck,18,0,21.8213,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,612.77,1426.43,0.0 +gfx950,256,3004,128,7168,ck,18,1,21.8256,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,252.56,1063.85,0.0 +gfx950,256,2843,128,7168,ck,7,0,21.8535,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,238.72,1007.8,0.0 +gfx950,256,2841,128,7168,ck,7,0,21.8778,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,238.29,1006.0,0.0 +gfx950,256,1567,8192,512,ck,18,0,21.9087,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,599.99,1399.92,0.0 +gfx950,256,2593,128,7168,ck,7,0,21.9344,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,216.93,919.47,0.0 +gfx950,256,2740,128,7168,ck,7,0,21.9348,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,229.22,969.2,0.0 +gfx950,256,2757,128,7168,ck,7,0,21.936,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,230.63,974.9,0.0 +gfx950,256,1549,8192,512,ck,18,0,21.9385,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,592.29,1384.15,0.0 +gfx950,256,1570,8192,512,ck,18,0,21.9642,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,599.62,1398.69,0.0 +gfx950,256,1562,8192,512,ck,18,0,21.9693,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,596.42,1392.21,0.0 +gfx950,256,1564,8192,512,ck,18,0,21.9798,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,596.9,1393.08,0.0 +gfx950,256,1613,8192,512,ck,18,0,21.9801,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.59,1430.73,0.0 +gfx950,256,2734,128,7168,ck,7,0,21.9856,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,228.19,964.94,0.0 +gfx950,256,2955,128,7168,ck,7,0,22.0035,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,246.44,1038.72,0.0 +gfx950,256,2947,128,7168,ck,7,0,22.0095,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,245.7,1035.74,0.0 +gfx950,256,2964,128,7168,ck,7,0,22.0109,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,247.1,1041.4,0.0 +gfx950,256,1546,8192,512,ck,18,0,22.0367,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,588.51,1375.68,0.0 +gfx950,256,1654,8192,512,ck,18,0,22.0371,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,629.61,1458.46,0.0 +gfx950,256,2876,128,7168,ck,7,0,22.0384,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,239.47,1010.46,0.0 +gfx950,256,2889,128,7168,ck,7,0,22.042,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,240.51,1014.67,0.0 +gfx950,256,1590,8192,512,ck,18,0,22.0439,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,605.06,1408.96,0.0 +gfx950,256,2729,128,7168,ck,7,0,22.0464,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,227.15,960.59,0.0 +gfx950,256,1627,8192,512,ck,18,0,22.0566,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.78,1436.49,0.0 +gfx950,256,1636,8192,512,ck,18,0,22.0691,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,621.85,1442.57,0.0 +gfx950,256,1611,8192,512,ck,18,0,22.071,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,612.3,1423.3,0.0 +gfx950,256,3258,128,7168,ck,18,1,22.0715,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,270.87,1137.43,0.0 +gfx950,256,1541,8192,512,ck,18,0,22.0768,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,585.54,1369.36,0.0 +gfx950,256,2946,128,7168,ck,7,0,22.0788,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,244.85,1032.15,0.0 +gfx950,256,1538,8192,512,ck,18,0,22.0845,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,584.2,1366.59,0.0 +gfx950,256,2918,128,7168,ck,7,0,22.0852,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,242.45,1022.44,0.0 +gfx950,256,1615,8192,512,ck,18,0,22.0856,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,613.41,1425.42,0.0 +gfx950,256,1599,8192,512,ck,18,0,22.0945,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,607.09,1412.61,0.0 +gfx950,256,1632,8192,512,ck,18,0,22.0945,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,619.62,1437.85,0.0 +gfx950,256,2952,128,7168,ck,7,0,22.1161,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,244.93,1032.42,0.0 +gfx950,256,1553,8192,512,ck,18,0,22.124,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,588.84,1375.6,0.0 +gfx950,256,1543,8192,512,ck,18,0,22.1352,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,584.75,1367.27,0.0 +gfx950,256,2861,128,7168,ck,7,0,22.1388,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,237.14,1000.85,0.0 +gfx950,256,1571,8192,512,ck,18,0,22.143,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,595.15,1388.16,0.0 +gfx950,256,1572,8192,512,ck,18,0,22.1467,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,595.43,1388.69,0.0 +gfx950,256,3019,128,7168,ck,7,0,22.1706,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,249.88,1052.32,0.0 +gfx950,256,1561,8192,512,ck,18,0,22.171,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,590.62,1378.78,0.0 +gfx950,256,1606,8192,512,ck,18,0,22.1719,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,607.62,1413.02,0.0 +gfx950,256,1580,8192,512,ck,18,0,22.1721,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,597.78,1393.19,0.0 +gfx950,256,1545,8192,512,ck,18,0,22.1808,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,584.31,1365.98,0.0 +gfx950,256,1563,8192,512,ck,18,0,22.1814,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,591.1,1379.66,0.0 +gfx950,256,1589,8192,512,ck,18,0,22.2025,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,600.36,1398.13,0.0 +gfx950,256,1598,8192,512,ck,18,0,22.2044,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,603.71,1404.86,0.0 +gfx950,256,1601,8192,512,ck,18,0,22.2076,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,604.76,1406.94,0.0 +gfx950,256,232,7168,4096,ck,18,0,22.2182,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,613.15,1513.91,0.0 +gfx950,256,3005,128,7168,ck,7,0,22.2184,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,248.18,1045.38,0.0 +gfx950,256,1554,8192,512,ck,18,0,22.2315,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,586.37,1369.71,0.0 +gfx950,256,1591,8192,512,ck,18,0,22.2344,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,600.25,1397.65,0.0 +gfx950,256,1655,8192,512,ck,18,0,22.2413,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,624.21,1445.83,0.0 +gfx950,256,1652,8192,512,ck,18,0,22.2465,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,622.93,1443.22,0.0 +gfx950,256,1573,8192,512,ck,18,0,22.2497,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,593.05,1383.02,0.0 +gfx950,256,3632,128,7168,ck,7,0,22.2814,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,299.12,1251.33,0.0 +gfx950,256,1607,8192,512,ck,18,0,22.3337,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,603.59,1403.54,0.0 +gfx950,256,3587,128,7168,ck,7,0,22.3606,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,294.36,1231.96,0.0 +gfx950,256,1659,8192,512,ck,18,0,22.362,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,622.34,1441.05,0.0 +gfx950,256,1626,8192,512,ck,18,0,22.3686,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,609.78,1415.7,0.0 +gfx950,256,1608,8192,512,ck,18,0,22.3838,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,602.62,1401.15,0.0 +gfx950,256,1568,8192,512,ck,18,0,22.3956,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,587.32,1370.23,0.0 +gfx950,256,184,7168,4096,ck,18,0,22.4018,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,482.31,1462.01,0.0 +gfx950,256,1660,8192,512,ck,18,0,22.4104,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,621.37,1438.69,0.0 +gfx950,256,1648,8192,512,ck,18,0,22.4548,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.66,1426.82,0.0 +gfx950,256,1649,8192,512,ck,18,0,22.458,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.94,1427.37,0.0 +gfx950,256,1647,8192,512,ck,18,0,22.4609,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.12,1425.68,0.0 +gfx950,256,3840,128,7168,ck,7,0,22.4731,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,313.55,1309.37,0.0 +gfx950,256,1609,8192,512,ck,18,0,22.4748,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,600.55,1396.23,0.0 +gfx950,256,1653,8192,512,ck,18,0,22.501,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,616.26,1427.64,0.0 +gfx950,256,1640,8192,512,ck,18,0,22.5324,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,610.56,1415.91,0.0 +gfx950,256,1631,8192,512,ck,18,0,22.5448,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,606.87,1408.38,0.0 +gfx950,256,240,7168,4096,ck,18,0,22.5571,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,624.76,1497.7,0.0 +gfx950,256,1646,8192,512,ck,18,0,22.568,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,611.82,1418.16,0.0 +gfx950,256,3620,128,7168,ck,7,0,22.5757,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,294.24,1231.08,0.0 +gfx950,256,3451,128,7168,ck,7,0,22.5909,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,280.32,1174.71,0.0 +gfx950,256,3617,128,7168,ck,7,0,22.6041,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,293.63,1228.54,0.0 +gfx950,256,3824,128,7168,ck,7,0,22.6106,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,310.34,1296.16,0.0 +gfx950,256,3831,128,7168,ck,7,0,22.6164,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,310.83,1298.12,0.0 +gfx950,256,4000,128,7168,ck,7,0,22.6338,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,324.3,1352.56,0.0 +gfx950,256,1658,8192,512,ck,18,0,22.6642,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,613.67,1421.09,0.0 +gfx950,256,3655,128,7168,ck,7,0,22.7267,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,295.11,1234.33,0.0 +gfx950,256,144,7168,4608,ck,18,0,22.742,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,418.29,1572.34,0.0 +gfx950,256,136,7168,4608,ck,18,0,22.745,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,395.0,1565.47,0.0 +gfx950,256,3749,128,7168,ck,7,0,22.758,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,302.29,1263.3,0.0 +gfx950,256,4002,128,7168,ck,7,0,22.7781,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,322.4,1344.64,0.0 +gfx950,256,200,7168,4608,ck,18,0,22.7803,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,579.98,1616.26,0.0 +gfx950,256,3894,128,7168,ck,7,0,22.819,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,313.14,1307.09,0.0 +gfx950,256,3971,128,7168,ck,7,0,22.8352,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,319.1,1331.2,0.0 +gfx950,256,3925,128,7168,ck,7,0,22.9206,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,314.23,1311.34,0.0 +gfx950,256,4050,128,7168,ck,7,0,22.9365,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,324.02,1350.89,0.0 +gfx950,256,248,7168,4096,ck,18,0,22.945,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,634.68,1478.81,0.0 +gfx950,256,152,7168,4608,ck,18,0,22.9751,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,437.05,1562.98,0.0 +gfx950,256,1664,8192,512,ck,18,0,23.0137,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,606.54,1403.91,0.0 +gfx950,256,208,7168,4608,ck,18,0,23.0225,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,596.83,1605.84,0.0 +gfx950,256,4046,128,7168,ck,7,0,23.0582,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_1x2_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,321.99,1342.47,0.0 +gfx950,256,1721,8192,512,ck,18,0,23.0798,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,625.52,1441.62,0.0 +gfx950,256,1709,8192,512,ck,18,0,23.1386,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,619.58,1429.19,0.0 +gfx950,256,168,7168,4608,ck,18,0,23.1605,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,479.18,1563.56,0.0 +gfx950,256,1680,8192,512,ck,18,0,23.1618,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,608.45,1406.61,0.0 +gfx950,256,1693,8192,512,ck,18,0,23.1816,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,612.64,1414.88,0.0 +gfx950,256,1694,8192,512,ck,18,0,23.1929,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,612.7,1414.92,0.0 +gfx950,256,1667,8192,512,ck,18,0,23.2127,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,602.42,1394.06,0.0 +gfx950,256,1722,8192,512,ck,18,0,23.2537,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,621.2,1431.57,0.0 +gfx950,256,1706,8192,512,ck,18,0,23.2667,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.08,1419.15,0.0 +gfx950,256,1690,8192,512,ck,18,0,23.2938,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,608.61,1405.89,0.0 +gfx950,256,1714,8192,512,ck,18,0,23.2964,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,617.18,1423.14,0.0 +gfx950,256,1701,8192,512,ck,18,0,23.2971,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,612.48,1413.67,0.0 +gfx950,256,24,9216,7168,ck,8,2,23.3048,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_1x1_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,136.06,2860.99,0.0 +gfx950,256,1675,8192,512,ck,18,0,23.307,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,602.86,1394.22,0.0 +gfx950,256,216,7168,4608,ck,18,0,23.318,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,611.93,1591.99,0.0 +gfx950,256,1711,8192,512,ck,18,0,23.3508,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,614.66,1417.65,0.0 +gfx950,256,1692,8192,512,ck,18,0,23.358,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,607.65,1403.47,0.0 +gfx950,256,176,7168,4608,ck,18,0,23.361,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,497.69,1556.62,0.0 +gfx950,256,1716,8192,512,ck,18,0,23.3639,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,616.12,1420.48,0.0 +gfx950,256,1735,8192,512,ck,18,0,23.3739,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,622.67,1433.6,0.0 +gfx950,256,1739,8192,512,ck,18,0,23.3862,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,623.78,1435.74,0.0 +gfx950,256,1783,8192,512,ck,18,0,23.3879,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,639.51,1467.42,0.0 +gfx950,256,1689,8192,512,ck,18,0,23.4078,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,605.28,1398.32,0.0 +gfx950,256,1703,8192,512,ck,18,0,23.4096,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,610.25,1408.32,0.0 +gfx950,256,1697,8192,512,ck,18,0,23.4109,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,608.07,1403.91,0.0 +gfx950,256,1726,8192,512,ck,18,0,23.4111,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.46,1424.83,0.0 +gfx950,256,4123,128,7168,ck,18,0,23.4177,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,323.08,1346.27,0.0 +gfx950,256,1756,8192,512,ck,18,0,23.4223,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,628.9,1445.79,0.0 +gfx950,256,1695,8192,512,ck,18,0,23.4477,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,606.4,1400.27,0.0 +gfx950,256,1782,8192,512,ck,18,0,23.4481,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,637.51,1462.93,0.0 +gfx950,256,1743,8192,512,ck,18,0,23.4552,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,623.37,1434.4,0.0 +gfx950,256,4598,128,7168,ck,18,0,23.4574,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,359.69,1494.33,0.0 +gfx950,256,1752,8192,512,ck,18,0,23.4597,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,626.47,1440.6,0.0 +gfx950,256,1719,8192,512,ck,18,0,23.4641,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,614.56,1416.57,0.0 +gfx950,256,5440,128,7168,ck,18,0,23.4678,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,425.37,1760.03,0.0 +gfx950,256,4176,128,7168,ck,18,0,23.4802,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,326.36,1359.45,0.0 +gfx950,256,4294,128,7168,ck,18,0,23.4828,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,335.54,1396.6,0.0 +gfx950,256,4522,128,7168,ck,18,0,23.4893,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,353.26,1468.28,0.0 +gfx950,256,1727,8192,512,ck,18,0,23.4979,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,616.53,1420.28,0.0 +gfx950,256,4180,128,7168,ck,18,0,23.4988,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,326.41,1359.64,0.0 +gfx950,256,4507,128,7168,ck,18,0,23.5333,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,351.43,1460.8,0.0 +gfx950,256,1708,8192,512,ck,18,0,23.5364,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,608.75,1404.32,0.0 +gfx950,256,1786,8192,512,ck,18,0,23.5443,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,636.33,1459.83,0.0 +gfx950,256,1738,8192,512,ck,18,0,23.5459,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,619.19,1425.28,0.0 +gfx950,256,1702,8192,512,ck,18,0,23.5556,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,606.12,1398.87,0.0 +gfx950,256,4504,128,7168,ck,18,0,23.5655,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,350.72,1457.86,0.0 +gfx950,256,1698,8192,512,ck,18,0,23.5689,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,604.35,1395.22,0.0 +gfx950,256,1677,8192,512,ck,18,0,23.5712,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,596.82,1380.03,0.0 +gfx950,256,1742,8192,512,ck,18,0,23.5943,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,619.34,1425.22,0.0 +gfx950,256,1717,8192,512,ck,18,0,23.5972,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,610.38,1407.15,0.0 +gfx950,256,1731,8192,512,ck,18,0,23.6204,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,614.75,1415.78,0.0 +gfx950,256,1733,8192,512,ck,18,0,23.6212,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.44,1417.16,0.0 +gfx950,256,1688,8192,512,ck,18,0,23.6227,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,599.42,1384.89,0.0 +gfx950,256,5003,128,7168,ck,18,0,23.6281,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,388.54,1610.78,0.0 +gfx950,256,1744,8192,512,ck,18,0,23.6607,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.31,1422.65,0.0 +gfx950,256,1753,8192,512,ck,18,0,23.6683,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,621.3,1428.62,0.0 +gfx950,256,5004,128,7168,ck,18,0,23.6783,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,387.8,1607.68,0.0 +gfx950,256,5018,128,7168,ck,18,0,23.681,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,388.84,1611.89,0.0 +gfx950,256,1749,8192,512,ck,18,0,23.6833,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,619.49,1424.86,0.0 +gfx950,256,1737,8192,512,ck,18,0,23.6896,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.08,1415.92,0.0 +gfx950,256,232,7168,4608,ck,18,0,23.6956,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,646.79,1579.41,0.0 +gfx950,256,240,7168,4608,ck,18,0,23.7008,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,668.94,1585.46,0.0 +gfx950,256,5006,128,7168,ck,18,0,23.7044,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,387.53,1606.54,0.0 +gfx950,256,4898,128,7168,ck,18,0,23.7065,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,379.13,1572.58,0.0 +gfx950,256,5002,128,7168,ck,18,0,23.7098,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,387.13,1604.92,0.0 +gfx950,256,184,7168,4608,ck,18,0,23.7109,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,512.64,1540.04,0.0 +gfx950,256,5000,128,7168,ck,18,0,23.7126,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,386.93,1604.11,0.0 +gfx950,256,5126,128,7168,ck,18,0,23.7165,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,396.61,1643.28,0.0 +gfx950,256,1724,8192,512,ck,18,0,23.7172,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,609.77,1405.01,0.0 +gfx950,256,1747,8192,512,ck,18,0,23.7217,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,617.78,1421.13,0.0 +gfx950,256,5124,128,7168,ck,18,0,23.728,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,396.27,1641.86,0.0 +gfx950,256,1765,8192,512,ck,18,0,23.7461,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,623.51,1432.48,0.0 +gfx950,256,5007,128,7168,ck,18,0,23.7513,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,386.84,1603.68,0.0 +gfx950,256,5272,128,7168,ck,18,0,23.7649,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,407.08,1685.55,0.0 +gfx950,256,1732,8192,512,ck,18,0,23.7665,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,611.33,1407.79,0.0 +gfx950,256,5273,128,7168,ck,18,0,23.7671,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,407.12,1685.7,0.0 +gfx950,256,1736,8192,512,ck,18,0,23.7682,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,612.69,1410.53,0.0 +gfx950,256,5269,128,7168,ck,18,0,23.7728,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,406.71,1684.05,0.0 +gfx950,256,5295,128,7168,ck,18,0,23.779,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,408.61,1691.73,0.0 +gfx950,256,1754,8192,512,ck,18,0,23.7845,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.62,1422.35,0.0 +gfx950,256,5277,128,7168,ck,18,0,23.7847,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,407.12,1685.7,0.0 +gfx950,256,5059,128,7168,ck,18,0,23.7849,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,390.3,1617.64,0.0 +gfx950,256,1745,8192,512,ck,18,0,23.786,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.41,1415.87,0.0 +gfx950,256,5281,128,7168,ck,18,0,23.7871,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,407.39,1686.78,0.0 +gfx950,256,1758,8192,512,ck,18,0,23.7968,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,619.71,1424.46,0.0 +gfx950,256,5276,128,7168,ck,18,0,23.808,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,406.65,1683.74,0.0 +gfx950,256,5285,128,7168,ck,18,0,23.8124,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,407.27,1686.24,0.0 +gfx950,256,5489,128,7168,ck,18,0,23.819,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,422.87,1749.35,0.0 +gfx950,256,5251,128,7168,ck,18,0,23.8265,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,404.41,1674.64,0.0 +gfx950,256,1740,8192,512,ck,18,0,23.8335,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,612.42,1409.5,0.0 +gfx950,256,1741,8192,512,ck,18,0,23.8473,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,612.42,1409.39,0.0 +gfx950,256,5368,128,7168,ck,18,0,23.8601,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,412.84,1708.69,0.0 +gfx950,256,40,9216,7168,ck,18,0,23.8618,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,221.48,2811.37,0.0 +gfx950,256,5302,128,7168,ck,18,0,23.8636,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,407.7,1687.91,0.0 +gfx950,256,5431,128,7168,ck,18,0,23.8641,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,417.61,1728.0,0.0 +gfx950,256,5264,128,7168,ck,18,0,23.8674,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,404.71,1675.82,0.0 +gfx950,256,856,6144,1536,ck,0,0,23.8684,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,676.9,891.16,0.0 +gfx950,256,5275,128,7168,ck,18,0,23.8699,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,405.52,1679.06,0.0 +gfx950,256,1757,8192,512,ck,18,0,23.8708,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,617.44,1419.33,0.0 +gfx950,256,5283,128,7168,ck,18,0,23.8726,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,406.09,1681.36,0.0 +gfx950,256,1778,8192,512,ck,18,0,23.8777,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,624.64,1433.78,0.0 +gfx950,256,1773,8192,512,ck,18,0,23.8813,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,622.79,1430.03,0.0 +gfx950,256,5292,128,7168,ck,18,0,23.8918,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,406.45,1682.81,0.0 +gfx950,256,877,6144,1536,ck,0,0,23.8941,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.76,902.35,0.0 +gfx950,256,1790,8192,512,ck,18,0,23.912,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,627.95,1440.2,0.0 +gfx950,256,5258,128,7168,ck,18,0,23.9228,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,403.32,1670.08,0.0 +gfx950,256,5540,128,7168,ck,18,0,23.9436,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,424.58,1756.06,0.0 +gfx950,256,5447,128,7168,ck,18,0,23.9483,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,417.37,1726.89,0.0 +gfx950,256,1788,8192,512,ck,18,0,23.9509,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,626.23,1436.45,0.0 +gfx950,256,1767,8192,512,ck,18,0,23.9524,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.84,1421.55,0.0 +gfx950,256,869,6144,1536,ck,0,0,23.9728,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.18,894.77,0.0 +gfx950,256,1755,8192,512,ck,18,0,23.9819,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,613.88,1411.35,0.0 +gfx950,256,849,6144,1536,ck,0,0,23.9863,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,668.06,882.74,0.0 +gfx950,256,6001,128,7168,ck,18,0,23.9985,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,458.86,1894.66,0.0 +gfx950,256,1789,8192,512,ck,18,0,24.0016,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,625.26,1434.12,0.0 +gfx950,256,1781,8192,512,ck,18,0,24.0285,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,621.77,1426.89,0.0 +gfx950,256,5670,128,7168,ck,18,0,24.0342,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,432.9,1789.6,0.0 +gfx950,256,6107,128,7168,ck,18,0,24.0409,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,466.14,1924.05,0.0 +gfx950,256,6108,128,7168,ck,18,0,24.0632,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,465.78,1922.57,0.0 +gfx950,256,6008,128,7168,ck,18,0,24.0669,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,458.09,1891.43,0.0 +gfx950,256,1776,8192,512,ck,18,0,24.072,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.9,1420.8,0.0 +gfx950,256,6026,128,7168,ck,18,0,24.0872,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,459.07,1895.39,0.0 +gfx950,256,6002,128,7168,ck,18,0,24.0936,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,457.12,1887.49,0.0 +gfx950,256,5890,128,7168,ck,18,0,24.1144,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,448.21,1851.38,0.0 +gfx950,256,48,9216,7168,ck,18,0,24.1689,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,262.39,2784.12,0.0 +gfx950,256,902,6144,1536,ck,0,0,24.1813,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.04,905.92,0.0 +gfx950,256,6225,128,7168,ck,18,0,24.2455,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,471.14,1943.94,0.0 +gfx950,256,1750,8192,512,ck,18,0,24.2518,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,605.32,1392.16,0.0 +gfx950,256,910,6144,1536,ck,0,0,24.2772,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.48,906.9,0.0 +gfx950,256,1809,8192,512,ck,18,0,24.2937,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,624.65,1430.79,0.0 +gfx950,256,6146,128,7168,ck,18,0,24.3019,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,464.08,1915.3,0.0 +gfx950,256,56,9216,7168,ck,18,0,24.303,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,304.44,2777.18,0.0 +gfx950,256,920,6144,1536,ck,0,0,24.312,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.23,911.29,0.0 +gfx950,256,6304,128,7168,ck,18,0,24.3201,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,475.65,1962.1,0.0 +gfx950,256,926,6144,1536,ck,0,0,24.3205,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,718.64,914.38,0.0 +gfx950,256,1793,8192,512,ck,18,0,24.3211,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.42,1418.06,0.0 +gfx950,256,1841,8192,512,ck,18,0,24.3416,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,634.45,1450.19,0.0 +gfx950,256,6476,128,7168,ck,18,0,24.3427,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,488.18,2012.73,0.0 +gfx950,256,6418,128,7168,ck,18,0,24.3579,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,483.5,1993.8,0.0 +gfx950,256,1851,8192,512,ck,18,0,24.3673,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,637.22,1455.59,0.0 +gfx950,256,6307,128,7168,ck,18,0,24.3932,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,474.45,1957.13,0.0 +gfx950,256,6449,128,7168,ck,18,0,24.397,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,485.06,2000.04,0.0 +gfx950,256,248,7168,4608,ck,18,0,24.3971,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,671.51,1546.42,0.0 +gfx950,256,962,6144,1536,ck,0,0,24.4257,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.36,930.82,0.0 +gfx950,256,6583,128,7168,ck,18,0,24.4584,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,493.89,2035.69,0.0 +gfx950,256,1840,8192,512,ck,18,0,24.4841,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,630.41,1441.06,0.0 +gfx950,256,1833,8192,512,ck,18,0,24.4905,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,627.85,1435.85,0.0 +gfx950,256,1812,8192,512,ck,18,0,24.4946,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,620.55,1421.12,0.0 +gfx950,256,930,6144,1536,ck,0,0,24.5334,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,715.48,908.7,0.0 +gfx950,256,734,8192,1536,ck,0,0,24.538,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.78,1048.83,0.0 +gfx950,256,6811,128,7168,ck,18,0,24.5405,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,509.29,2097.85,0.0 +gfx950,256,6819,128,7168,ck,18,0,24.5438,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,509.82,2099.99,0.0 +gfx950,256,6809,128,7168,ck,18,0,24.5549,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,508.84,2096.02,0.0 +gfx950,256,6812,128,7168,ck,18,0,24.5549,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,509.07,2096.93,0.0 +gfx950,256,1844,8192,512,ck,18,0,24.5681,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,629.62,1438.88,0.0 +gfx950,256,7000,128,7168,ck,18,0,24.5745,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,522.7,2152.05,0.0 +gfx950,256,1001,6144,1536,ck,0,0,24.582,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.58,946.83,0.0 +gfx950,256,6815,128,7168,ck,18,0,24.5935,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,508.49,2094.54,0.0 +gfx950,256,6817,128,7168,ck,18,0,24.5958,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,508.59,2094.95,0.0 +gfx950,256,1015,6144,1536,ck,0,0,24.6,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.76,954.01,0.0 +gfx950,256,1828,8192,512,ck,18,0,24.6146,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,622.98,1425.18,0.0 +gfx950,256,6818,128,7168,ck,18,0,24.6216,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,508.13,2093.05,0.0 +gfx950,256,1849,8192,512,ck,18,0,24.6227,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,629.93,1439.12,0.0 +gfx950,256,1007,6144,1536,ck,0,0,24.6275,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.76,948.45,0.0 +gfx950,256,7001,128,7168,ck,18,0,24.6311,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,521.57,2147.4,0.0 +gfx950,256,1814,8192,512,ck,18,0,24.6328,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,617.75,1414.52,0.0 +gfx950,256,6813,128,7168,ck,18,0,24.6366,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,507.45,2090.27,0.0 +gfx950,256,1000,6144,1536,ck,0,0,24.6384,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.05,944.1,0.0 +gfx950,256,1009,6144,1536,ck,0,0,24.6439,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.78,948.94,0.0 +gfx950,256,6814,128,7168,ck,18,0,24.6467,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,507.32,2089.72,0.0 +gfx950,256,1797,8192,512,ck,18,0,24.6622,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,611.23,1401.19,0.0 +gfx950,256,1813,8192,512,ck,18,0,24.6699,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,616.48,1411.71,0.0 +gfx950,256,7289,128,7168,ck,18,0,24.707,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,541.36,2227.35,0.0 +gfx950,256,1023,6144,1536,ck,0,0,24.7104,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.39,954.22,0.0 +gfx950,256,1040,6144,1536,ck,0,0,24.7107,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.37,963.72,0.0 +gfx950,256,1799,8192,512,ck,18,0,24.7215,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,610.44,1399.2,0.0 +gfx950,256,7440,128,7168,ck,18,0,24.7219,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,552.24,2271.35,0.0 +gfx950,256,1798,8192,512,ck,18,0,24.7268,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,609.97,1398.21,0.0 +gfx950,256,1054,6144,1536,ck,0,0,24.7563,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.58,969.76,0.0 +gfx950,256,1063,6144,1536,ck,0,0,24.7596,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.33,974.66,0.0 +gfx950,256,7009,128,7168,ck,18,0,24.787,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,518.88,2136.29,0.0 +gfx950,256,1003,6144,1536,ck,0,0,24.7954,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.49,939.8,0.0 +gfx950,256,7050,128,7168,ck,18,0,24.8062,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,521.52,2146.91,0.0 +gfx950,256,1802,8192,512,ck,18,0,24.8244,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,608.93,1395.44,0.0 +gfx950,256,7238,128,7168,ck,18,0,24.8556,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,534.36,2198.8,0.0 +gfx950,256,814,8192,1536,ck,0,0,24.8628,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.92,1092.79,0.0 +gfx950,256,1808,8192,512,ck,18,0,24.8792,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,609.61,1396.44,0.0 +gfx950,256,7539,128,7168,ck,18,0,24.8897,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,555.82,2285.57,0.0 +gfx950,256,7242,128,7168,ck,18,0,24.8907,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,533.9,2196.89,0.0 +gfx950,256,7241,128,7168,ck,18,0,24.921,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,533.18,2193.92,0.0 +gfx950,256,1805,8192,512,ck,18,0,24.9468,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,606.95,1390.62,0.0 +gfx950,256,1047,6144,1536,ck,0,0,24.9472,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.13,958.46,0.0 +gfx950,256,1839,8192,512,ck,18,0,24.9624,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.0,1412.77,0.0 +gfx950,256,1087,6144,1536,ck,0,0,24.9744,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.5,979.56,0.0 +gfx950,256,1855,8192,512,ck,18,0,24.9792,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,622.95,1422.64,0.0 +gfx950,256,7536,128,7168,ck,18,0,24.9917,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,553.33,2275.35,0.0 +gfx950,256,1848,8192,512,ck,18,0,24.9967,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,620.17,1416.91,0.0 +gfx950,256,7656,128,7168,ck,18,0,24.9972,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,562.02,2310.48,0.0 +gfx950,256,1837,8192,512,ck,18,0,25.0132,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,616.07,1408.55,0.0 +gfx950,256,7990,128,7168,ck,18,0,25.0458,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,585.4,2405.0,0.0 +gfx950,256,806,8192,1536,ck,0,0,25.0472,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,809.82,1079.02,0.0 +gfx950,256,8001,128,7168,ck,18,0,25.0547,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,585.99,2407.41,0.0 +gfx950,256,813,8192,1536,ck,0,0,25.0603,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.42,1083.46,0.0 +gfx950,256,8051,128,7168,ck,18,0,25.0608,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,589.51,2421.64,0.0 +gfx950,256,1881,8192,512,ck,18,0,25.0692,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,629.42,1435.05,0.0 +gfx950,256,1807,8192,512,ck,18,0,25.0965,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,604.0,1383.67,0.0 +gfx950,256,1918,8192,512,ck,18,0,25.1018,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,640.96,1458.1,0.0 +gfx950,256,1868,8192,512,ck,18,0,25.1073,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,624.12,1424.13,0.0 +gfx950,256,8007,128,7168,ck,18,0,25.1141,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,585.05,2403.49,0.0 +gfx950,256,1903,8192,512,ck,18,0,25.1142,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,635.64,1447.28,0.0 +gfx950,256,8008,128,7168,ck,18,0,25.1145,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,585.11,2403.75,0.0 +gfx950,256,1836,8192,512,ck,18,0,25.1184,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,613.16,1401.97,0.0 +gfx950,256,8047,128,7168,ck,18,0,25.1219,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,587.79,2414.56,0.0 +gfx950,256,1891,8192,512,ck,18,0,25.1409,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,630.96,1437.68,0.0 +gfx950,256,8010,128,7168,ck,18,0,25.1431,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,584.59,2401.6,0.0 +gfx950,256,7871,128,7168,ck,18,0,25.1434,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,574.44,2360.53,0.0 +gfx950,256,1888,8192,512,ck,18,0,25.1439,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,629.88,1435.5,0.0 +gfx950,256,1871,8192,512,ck,18,0,25.1466,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,624.14,1423.92,0.0 +gfx950,256,1144,6144,1536,ck,0,0,25.1482,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,858.6,1004.12,0.0 +gfx950,256,1870,8192,512,ck,18,0,25.1692,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,623.25,1421.97,0.0 +gfx950,256,1860,8192,512,ck,18,0,25.1716,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,619.86,1415.12,0.0 +gfx950,256,830,8192,1536,ck,0,0,25.1735,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.75,1090.69,0.0 +gfx950,256,8015,128,7168,ck,18,0,25.1786,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,584.13,2399.69,0.0 +gfx950,256,8182,128,7168,ck,18,0,25.204,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,595.7,2446.46,0.0 +gfx950,256,1794,8192,512,ck,18,0,25.2156,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,596.82,1368.43,0.0 +gfx950,256,8043,128,7168,ck,18,0,25.2255,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,585.08,2403.47,0.0 +gfx950,256,8018,128,7168,ck,18,0,25.2267,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,583.23,2396.0,0.0 +gfx950,256,1867,8192,512,ck,18,0,25.2305,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,620.74,1416.51,0.0 +gfx950,256,7809,128,7168,ck,18,0,25.2328,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,567.89,2333.93,0.0 +gfx950,256,869,8192,1536,ck,0,0,25.2446,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,866.29,1115.3,0.0 +gfx950,256,1112,6144,1536,ck,0,0,25.2458,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.36,982.72,0.0 +gfx950,256,809,8192,1536,ck,0,0,25.2509,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.27,1072.44,0.0 +gfx950,256,8090,128,7168,ck,18,0,25.2774,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,587.29,2412.34,0.0 +gfx950,256,1132,6144,1536,ck,0,0,25.2813,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,845.12,992.27,0.0 +gfx950,256,1856,8192,512,ck,18,0,25.291,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.6,1405.77,0.0 +gfx950,256,856,8192,1536,ck,0,0,25.2956,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,851.61,1103.85,0.0 +gfx950,256,1910,8192,512,ck,18,0,25.3294,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,632.56,1439.66,0.0 +gfx950,256,8048,128,7168,ck,18,0,25.3323,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,582.98,2394.8,0.0 +gfx950,256,849,8192,1536,ck,0,0,25.334,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,843.36,1097.22,0.0 +gfx950,256,1899,8192,512,ck,18,0,25.3552,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,628.27,1430.86,0.0 +gfx950,256,8040,128,7168,ck,18,0,25.3597,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,581.77,2389.87,0.0 +gfx950,256,1885,8192,512,ck,18,0,25.3629,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,623.45,1421.1,0.0 +gfx950,256,1024,6144,1536,ck,2,0,25.3633,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_1x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.02,930.2,0.0 +gfx950,256,1863,8192,512,ck,18,0,25.3657,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,616.11,1406.29,0.0 +gfx950,256,1898,8192,512,ck,18,0,25.4804,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,624.86,1423.17,0.0 +gfx950,256,1878,8192,512,ck,18,0,25.4913,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.01,1409.3,0.0 +gfx950,256,1861,8192,512,ck,18,0,25.4954,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,612.31,1397.81,0.0 +gfx950,256,1895,8192,512,ck,18,0,25.5015,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,623.35,1420.0,0.0 +gfx950,256,1889,8192,512,ck,18,0,25.5087,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,621.2,1415.63,0.0 +gfx950,256,1858,8192,512,ck,18,0,25.5315,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,610.46,1393.85,0.0 +gfx950,256,1920,8192,512,ck,0,0,25.5316,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,630.83,1434.87,0.0 +gfx950,256,1900,8192,512,ck,18,0,25.5472,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,623.88,1420.77,0.0 +gfx950,256,877,8192,1536,ck,0,0,25.5813,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,862.76,1106.23,0.0 +gfx950,256,1912,8192,512,ck,18,0,25.5972,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,626.59,1425.92,0.0 +gfx950,256,1185,6144,1536,ck,0,0,25.5997,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,873.69,1008.55,0.0 +gfx950,256,1265,6144,1536,ck,0,0,25.6036,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,932.53,1051.59,0.0 +gfx950,256,1218,6144,1536,ck,0,0,25.6067,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,897.77,1026.09,0.0 +gfx950,256,1908,8192,512,ck,18,0,25.6228,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,624.66,1421.85,0.0 +gfx950,256,1896,8192,512,ck,0,0,25.6964,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,618.95,1409.89,0.0 +gfx950,256,1901,8192,512,ck,18,0,25.6968,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,620.57,1413.16,0.0 +gfx950,256,1255,6144,1536,ck,0,0,25.7008,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,921.66,1042.24,0.0 +gfx950,256,72,9216,7168,ck,12,0,25.8224,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,368.39,2629.64,0.0 +gfx950,256,1270,6144,1536,ck,0,0,25.8804,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,926.2,1043.02,0.0 +gfx950,256,902,8192,1536,ck,0,0,25.8899,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,876.77,1110.35,0.0 +gfx950,256,80,9216,7168,ck,12,0,25.913,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,407.89,2628.34,0.0 +gfx950,256,1946,8192,512,ck,18,0,25.9824,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,628.28,1426.89,0.0 +gfx950,256,930,8192,1536,ck,0,0,26.0915,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,897.01,1121.0,0.0 +gfx950,256,88,9216,7168,ck,12,0,26.1576,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,444.48,2611.6,0.0 +gfx950,256,920,8192,1536,ck,0,0,26.1625,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,884.95,1111.11,0.0 +gfx950,256,1000,8192,1536,ck,0,0,26.178,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,961.33,1165.21,0.0 +gfx950,256,910,8192,1536,ck,0,0,26.2249,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,873.25,1101.63,0.0 +gfx950,256,1940,8192,512,ck,18,0,26.2332,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,620.36,1409.38,0.0 +gfx950,256,1928,8192,512,ck,18,0,26.2582,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.93,1400.32,0.0 +gfx950,256,1932,8192,512,ck,18,0,26.2813,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,616.67,1401.66,0.0 +gfx950,256,1001,8192,1536,ck,0,0,26.3135,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,957.34,1159.89,0.0 +gfx950,256,1923,8192,512,ck,0,0,26.3336,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,612.57,1393.1,0.0 +gfx950,256,1933,8192,512,ck,0,0,26.3337,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,615.76,1399.51,0.0 +gfx950,256,1007,8192,1536,ck,0,0,26.341,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,962.07,1162.76,0.0 +gfx950,256,1925,8192,512,ck,0,0,26.3441,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,612.97,1393.83,0.0 +gfx950,256,1009,8192,1536,ck,0,0,26.3527,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,963.56,1163.61,0.0 +gfx950,256,1926,8192,512,ck,0,0,26.373,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,612.61,1392.94,0.0 +gfx950,256,1015,8192,1536,ck,0,0,26.3778,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,968.36,1166.58,0.0 +gfx950,256,1944,8192,512,ck,18,0,26.3819,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.13,1404.0,0.0 +gfx950,256,1968,8192,512,ck,18,0,26.3948,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,625.46,1418.67,0.0 +gfx950,256,1951,8192,512,ck,18,0,26.3956,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,620.03,1407.75,0.0 +gfx950,256,1003,8192,1536,ck,0,0,26.403,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,956.0,1157.32,0.0 +gfx950,256,1945,8192,512,ck,18,0,26.4092,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,617.81,1403.19,0.0 +gfx950,256,1938,8192,512,ck,0,0,26.4252,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,615.21,1397.86,0.0 +gfx950,256,1935,8192,512,ck,18,0,26.4271,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,614.22,1395.84,0.0 +gfx950,256,1964,8192,512,ck,18,0,26.4283,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,623.39,1414.32,0.0 +gfx950,256,1958,8192,512,ck,0,0,26.4384,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,621.25,1409.94,0.0 +gfx950,256,1937,8192,512,ck,18,0,26.45,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,614.32,1395.91,0.0 +gfx950,256,1927,8192,512,ck,18,0,26.4711,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,610.66,1388.42,0.0 +gfx950,256,1967,8192,512,ck,0,0,26.4776,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,623.18,1413.6,0.0 +gfx950,256,1965,8192,512,ck,0,0,26.4814,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,622.46,1412.12,0.0 +gfx950,256,1956,8192,512,ck,18,0,26.49,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,619.41,1405.92,0.0 +gfx950,256,1953,8192,512,ck,0,0,26.4938,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,618.37,1403.81,0.0 +gfx950,256,1921,8192,512,ck,0,0,26.4947,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,608.22,1383.35,0.0 +gfx950,256,1971,8192,512,ck,0,0,26.5029,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,623.85,1414.8,0.0 +gfx950,256,1966,8192,512,ck,18,0,26.504,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,622.25,1411.55,0.0 +gfx950,256,1023,8192,1536,ck,0,0,26.5102,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,971.12,1166.16,0.0 +gfx950,256,1962,8192,512,ck,0,0,26.5254,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,620.48,1407.87,0.0 +gfx950,256,1955,8192,512,ck,18,0,26.5372,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,617.99,1402.78,0.0 +gfx950,256,1949,8192,512,ck,0,0,26.5499,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,615.8,1398.3,0.0 +gfx950,256,1960,8192,512,ck,0,0,26.5528,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,619.21,1405.14,0.0 +gfx950,256,1990,8192,512,ck,0,0,26.5638,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,628.42,1423.64,0.0 +gfx950,256,2003,8192,512,ck,0,0,26.5799,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,632.15,1431.04,0.0 +gfx950,256,1987,8192,512,ck,0,0,26.5971,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,626.69,1419.95,0.0 +gfx950,256,1972,8192,512,ck,18,0,26.6033,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,621.82,1410.1,0.0 +gfx950,256,1957,8192,512,ck,0,0,26.6138,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,616.84,1400.02,0.0 +gfx950,256,1996,8192,512,ck,0,0,26.6244,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,628.88,1424.21,0.0 +gfx950,256,1988,8192,512,ck,0,0,26.6324,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,626.18,1418.71,0.0 +gfx950,256,1992,8192,512,ck,0,0,26.6332,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,627.42,1421.2,0.0 +gfx950,256,1963,8192,512,ck,0,0,26.6433,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,618.05,1402.27,0.0 +gfx950,256,2000,8192,512,ck,0,0,26.6455,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,629.65,1425.62,0.0 +gfx950,256,1989,8192,512,ck,0,0,26.6531,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,626.0,1418.24,0.0 +gfx950,256,2014,8192,512,ck,0,0,26.6569,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,633.78,1433.88,0.0 +gfx950,256,2006,8192,512,ck,0,0,26.6571,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,631.26,1428.8,0.0 +gfx950,256,1985,8192,512,ck,0,0,26.6821,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,624.07,1414.16,0.0 +gfx950,256,2004,8192,512,ck,0,0,26.6838,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,630.0,1426.1,0.0 +gfx950,256,1999,8192,512,ck,0,0,26.6952,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,628.16,1422.33,0.0 +gfx950,256,2020,8192,512,ck,0,0,26.7042,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,634.54,1435.14,0.0 +gfx950,256,2008,8192,512,ck,0,0,26.7087,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,630.67,1427.31,0.0 +gfx950,256,1982,8192,512,ck,0,0,26.7251,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,622.12,1409.99,0.0 +gfx950,256,1993,8192,512,ck,0,0,26.7337,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,625.37,1416.49,0.0 +gfx950,256,1991,8192,512,ck,0,0,26.7406,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,624.58,1414.86,0.0 +gfx950,256,1994,8192,512,ck,0,0,26.769,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,624.86,1415.25,0.0 +gfx950,256,2001,8192,512,ck,0,0,26.783,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,626.73,1418.93,0.0 +gfx950,256,2034,8192,512,ck,0,0,26.7889,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,636.92,1439.43,0.0 +gfx950,256,2044,8192,512,ck,0,0,26.8476,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,638.65,1442.58,0.0 +gfx950,256,2007,8192,512,ck,0,0,26.8485,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,627.07,1419.24,0.0 +gfx950,256,2012,8192,512,ck,0,0,26.869,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,628.15,1421.31,0.0 +gfx950,256,1995,8192,512,ck,0,0,26.8773,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,622.65,1410.18,0.0 +gfx950,256,2002,8192,512,ck,0,0,26.8937,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,624.46,1413.72,0.0 +gfx950,256,2005,8192,512,ck,0,0,26.8943,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,625.38,1415.57,0.0 +gfx950,256,2028,8192,512,ck,0,0,26.906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,632.28,1429.4,0.0 +gfx950,256,2046,8192,512,ck,0,0,26.9197,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,637.57,1439.97,0.0 +gfx950,256,2009,8192,512,ck,0,0,26.9255,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,625.9,1416.44,0.0 +gfx950,256,2030,8192,512,ck,0,0,26.9394,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,632.12,1428.88,0.0 +gfx950,256,2033,8192,512,ck,0,0,26.9458,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,632.9,1430.42,0.0 +gfx950,256,2040,8192,512,ck,0,0,26.9534,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,634.9,1434.41,0.0 +gfx950,256,2024,8192,512,ck,0,0,26.9597,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,629.77,1424.04,0.0 +gfx950,256,2043,8192,512,ck,0,0,26.9812,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,635.18,1434.81,0.0 +gfx950,256,2029,8192,512,ck,0,0,26.9815,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,630.82,1426.02,0.0 +gfx950,256,2045,8192,512,ck,0,0,27.0197,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,634.9,1434.01,0.0 +gfx950,256,2026,8192,512,ck,0,0,27.0243,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,628.89,1421.89,0.0 +gfx950,256,2042,8192,512,ck,0,0,27.0549,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,633.14,1430.27,0.0 +gfx950,256,2041,8192,512,ck,0,0,27.0788,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,632.27,1428.39,0.0 +gfx950,256,2087,8192,512,ck,18,0,27.1083,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,645.82,1455.5,0.0 +gfx950,256,2047,8192,512,ck,0,0,27.124,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,633.07,1429.75,0.0 +gfx950,256,2088,8192,512,ck,18,0,27.1492,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,645.15,1453.93,0.0 +gfx950,256,2075,8192,512,ck,18,0,27.1923,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,640.12,1443.55,0.0 +gfx950,256,2068,8192,512,ck,18,0,27.2188,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,637.34,1437.8,0.0 +gfx950,256,2067,8192,512,ck,18,0,27.2328,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,636.7,1436.44,0.0 +gfx950,256,2053,8192,512,ck,18,0,27.2448,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,632.11,1427.13,0.0 +gfx950,256,2059,8192,512,ck,18,0,27.2612,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,633.58,1429.99,0.0 +gfx950,256,2058,8192,512,ck,18,0,27.2765,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,632.92,1428.57,0.0 +gfx950,256,2081,8192,512,ck,18,0,27.3172,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,639.04,1440.66,0.0 +gfx950,256,2136,8192,512,ck,18,0,27.3346,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,655.51,1473.74,0.0 +gfx950,256,2066,8192,512,ck,18,0,27.3672,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,633.27,1428.77,0.0 +gfx950,256,272,7168,4096,ck,12,0,27.3906,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,583.12,1254.94,0.0 +gfx950,256,2085,8192,512,ck,18,0,27.4036,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,638.25,1438.59,0.0 +gfx950,256,2094,8192,512,ck,18,0,27.4313,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,640.35,1442.68,0.0 +gfx950,256,2120,8192,512,ck,18,0,27.4322,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,648.28,1458.64,0.0 +gfx950,256,2051,8192,512,ck,18,0,27.4574,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,626.61,1414.85,0.0 +gfx950,256,2079,8192,512,ck,18,0,27.464,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,635.01,1431.73,0.0 +gfx950,256,2138,8192,512,ck,18,0,27.4692,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,652.91,1467.75,0.0 +gfx950,256,2049,8192,512,ck,18,0,27.474,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,625.62,1412.76,0.0 +gfx950,256,2111,8192,512,ck,18,0,27.4921,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,644.13,1449.94,0.0 +gfx950,256,2114,8192,512,ck,18,0,27.4965,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,644.94,1451.55,0.0 +gfx950,256,2124,8192,512,ck,18,0,27.5128,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,647.6,1456.83,0.0 +gfx950,256,2139,8192,512,ck,18,0,27.5178,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,652.06,1465.77,0.0 +gfx950,256,2097,8192,512,ck,18,0,27.5286,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,639.0,1439.42,0.0 +gfx950,256,2113,8192,512,ck,18,0,27.5345,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,643.74,1448.93,0.0 +gfx950,256,2083,8192,512,ck,18,0,27.55,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,634.25,1429.72,0.0 +gfx950,256,2054,8192,512,ck,18,0,27.5797,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,624.74,1410.41,0.0 +gfx950,256,2080,8192,512,ck,18,0,27.586,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,632.51,1426.01,0.0 +gfx950,256,2065,8192,512,ck,18,0,27.5884,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,627.89,1416.7,0.0 +gfx950,256,2134,8192,512,ck,18,0,27.5945,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,648.73,1458.64,0.0 +gfx950,256,2105,8192,512,ck,18,0,27.6113,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,639.52,1440.0,0.0 +gfx950,256,2057,8192,512,ck,18,0,27.6584,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,623.87,1408.23,0.0 +gfx950,256,2169,8192,512,ck,18,0,27.6796,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,657.34,1475.52,0.0 +gfx950,256,2091,8192,512,ck,18,0,27.6841,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,633.6,1427.67,0.0 +gfx950,256,2143,8192,512,ck,18,0,27.7133,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,648.67,1457.87,0.0 +gfx950,256,2095,8192,512,ck,18,0,27.718,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,634.03,1428.37,0.0 +gfx950,256,2106,8192,512,ck,18,0,27.7257,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,637.19,1434.67,0.0 +gfx950,256,2132,8192,512,ck,18,0,27.737,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,644.79,1449.93,0.0 +gfx950,256,2102,8192,512,ck,18,0,27.7376,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,635.7,1431.62,0.0 +gfx950,256,2100,8192,512,ck,18,0,27.7456,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,634.91,1429.99,0.0 +gfx950,256,2129,8192,512,ck,18,0,27.7707,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,643.1,1446.34,0.0 +gfx950,256,2098,8192,512,ck,18,0,27.7796,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,633.53,1427.02,0.0 +gfx950,256,2071,8192,512,ck,18,0,27.8153,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,624.58,1408.79,0.0 +gfx950,256,2166,8192,512,ck,18,0,27.8185,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,653.15,1466.33,0.0 +gfx950,256,2127,8192,512,ck,18,0,27.8216,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,641.32,1442.48,0.0 +gfx950,256,2146,8192,512,ck,18,0,27.8401,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,646.62,1453.05,0.0 +gfx950,256,2145,8192,512,ck,18,0,27.8733,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,645.55,1450.72,0.0 +gfx950,256,2070,8192,512,ck,18,0,27.8773,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,622.89,1405.05,0.0 +gfx950,256,2163,8192,512,ck,18,0,27.8816,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,650.77,1461.19,0.0 +gfx950,256,2148,8192,512,ck,18,0,27.8829,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,646.23,1452.03,0.0 +gfx950,256,2172,8192,512,ck,18,0,27.9148,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,652.7,1464.9,0.0 +gfx950,256,2141,8192,512,ck,18,0,27.9377,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,642.86,1444.95,0.0 +gfx950,256,2133,8192,512,ck,18,0,27.958,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,639.99,1439.07,0.0 +gfx950,256,2099,8192,512,ck,18,0,27.9645,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,629.64,1418.19,0.0 +gfx950,256,2152,8192,512,ck,18,0,28.0041,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,644.63,1448.16,0.0 +gfx950,256,2137,8192,512,ck,18,0,28.0078,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,640.05,1438.92,0.0 +gfx950,256,2130,8192,512,ck,18,0,28.0209,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,637.66,1434.03,0.0 +gfx950,256,2174,8192,512,ck,18,0,28.078,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,649.51,1457.59,0.0 +gfx950,256,2104,8192,512,ck,18,0,28.0928,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,628.26,1414.72,0.0 +gfx950,256,2131,8192,512,ck,18,0,28.1172,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,635.77,1429.72,0.0 +gfx950,256,2122,8192,512,ck,18,0,28.118,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,633.07,1424.27,0.0 +gfx950,256,2214,8192,512,ck,18,0,28.2789,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,656.76,1471.13,0.0 +gfx950,256,2183,8192,512,ck,18,0,28.3019,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,647.04,1451.43,0.0 +gfx950,256,2210,8192,512,ck,18,0,28.404,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,652.68,1462.28,0.0 +gfx950,256,2208,8192,512,ck,18,0,28.4465,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,651.12,1458.9,0.0 +gfx950,256,2235,8192,512,ck,18,0,28.5544,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,656.59,1469.37,0.0 +gfx950,256,2182,8192,512,ck,18,0,28.5702,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,640.67,1437.21,0.0 +gfx950,256,2237,8192,512,ck,18,0,28.6473,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,655.05,1465.78,0.0 +gfx950,256,2232,8192,512,ck,18,0,28.686,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,652.7,1460.86,0.0 +gfx950,256,2221,8192,512,ck,18,0,28.6892,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,649.41,1454.22,0.0 +gfx950,256,2212,8192,512,ck,18,0,28.7157,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,646.18,1447.58,0.0 +gfx950,256,2180,8192,512,ck,18,0,28.754,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,635.99,1426.85,0.0 +gfx950,256,2238,8192,512,ck,18,0,28.7728,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,652.48,1459.97,0.0 +gfx950,256,2186,8192,512,ck,18,0,28.7782,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,637.2,1429.17,0.0 +gfx950,256,2193,8192,512,ck,18,0,28.7801,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,639.2,1433.19,0.0 +gfx950,256,2262,8192,512,ck,18,0,28.7813,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,659.28,1473.63,0.0 +gfx950,256,2263,8192,512,ck,18,0,28.7945,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,659.27,1473.54,0.0 +gfx950,256,2177,8192,512,ck,18,0,28.7953,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,634.2,1423.04,0.0 +gfx950,256,2272,8192,512,ck,18,0,28.7969,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,661.84,1478.7,0.0 +gfx950,256,2179,8192,512,ck,18,0,28.7981,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,634.72,1424.08,0.0 +gfx950,256,2246,8192,512,ck,18,0,28.8141,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,653.87,1462.57,0.0 +gfx950,256,2219,8192,512,ck,18,0,28.8144,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,646.01,1446.73,0.0 +gfx950,256,2187,8192,512,ck,18,0,28.8145,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,636.69,1427.96,0.0 +gfx950,256,2253,8192,512,ck,18,0,28.8162,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,655.86,1466.57,0.0 +gfx950,256,2231,8192,512,ck,18,0,28.8253,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,649.26,1453.21,0.0 +gfx950,256,2178,8192,512,ck,18,0,28.8416,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,633.47,1421.34,0.0 +gfx950,256,2220,8192,512,ck,18,0,28.8425,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,645.67,1445.9,0.0 +gfx950,256,2249,8192,512,ck,18,0,28.8449,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,654.05,1462.77,0.0 +gfx950,256,2239,8192,512,ck,18,0,28.8924,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,650.07,1454.52,0.0 +gfx950,256,2216,8192,512,ck,18,0,28.9029,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,643.16,1440.54,0.0 +gfx950,256,2225,8192,512,ck,18,0,28.9032,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,645.76,1445.79,0.0 +gfx950,256,2184,8192,512,ck,18,0,28.9137,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,633.63,1421.3,0.0 +gfx950,256,2260,8192,512,ck,18,0,28.9273,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,655.38,1465.03,0.0 +gfx950,256,2297,8192,512,ck,18,0,28.9325,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,665.99,1486.37,0.0 +gfx950,256,2284,8192,512,ck,18,0,28.9564,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,661.67,1477.56,0.0 +gfx950,256,2240,8192,512,ck,17,0,28.9621,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,648.8,1451.6,0.0 +gfx950,256,2291,8192,512,ck,18,0,28.9709,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,663.37,1480.9,0.0 +gfx950,256,2255,8192,512,ck,18,0,28.9921,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,652.46,1458.84,0.0 +gfx950,256,2198,8192,512,ck,18,0,28.9968,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,635.87,1425.39,0.0 +gfx950,256,2302,8192,512,ck,18,0,28.9985,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,665.92,1485.9,0.0 +gfx950,256,2192,8192,512,ck,18,0,29.0205,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,633.62,1420.73,0.0 +gfx950,256,2244,8192,512,ck,18,0,29.0577,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,647.82,1449.15,0.0 +gfx950,256,2292,8192,512,ck,18,0,29.0917,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,660.9,1475.33,0.0 +gfx950,256,2242,8192,512,ck,18,0,29.0925,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,646.46,1446.25,0.0 +gfx950,256,2257,8192,512,ck,18,0,29.0969,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,650.69,1454.75,0.0 +gfx950,256,2250,8192,512,ck,18,0,29.1017,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,648.57,1450.44,0.0 +gfx950,256,2275,8192,512,ck,18,0,29.1053,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,655.69,1464.77,0.0 +gfx950,256,2286,8192,512,ck,18,0,29.1141,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,658.66,1470.72,0.0 +gfx950,256,2204,8192,512,ck,18,0,29.1213,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,634.88,1422.78,0.0 +gfx950,256,2190,8192,512,ck,18,0,29.1281,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,630.7,1414.32,0.0 +gfx950,256,2224,8192,512,ck,0,0,29.1368,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,640.3,1433.62,0.0 +gfx950,256,2290,8192,512,ck,18,0,29.1725,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,658.49,1470.09,0.0 +gfx950,256,2196,8192,512,ck,18,0,29.1885,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,631.12,1414.87,0.0 +gfx950,256,2207,8192,512,ck,0,0,29.2165,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,633.67,1419.87,0.0 +gfx950,256,2269,8192,512,ck,18,0,29.2201,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,651.39,1455.55,0.0 +gfx950,256,2248,8192,512,ck,0,0,29.2331,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,645.08,1442.77,0.0 +gfx950,256,2304,8192,512,ck,0,0,29.2418,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,660.95,1474.69,0.0 +gfx950,256,2245,8192,512,ck,18,0,29.2421,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,644.02,1440.59,0.0 +gfx950,256,2300,8192,512,ck,18,0,29.2641,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,659.3,1471.26,0.0 +gfx950,256,2278,8192,512,ck,0,0,29.3114,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,651.94,1456.2,0.0 +gfx950,256,2295,8192,512,ck,18,0,29.4077,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,654.65,1461.2,0.0 +gfx950,256,2285,8192,512,ck,0,0,29.4124,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,651.7,1455.23,0.0 +gfx950,256,2298,8192,512,ck,0,0,29.4283,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,655.05,1461.9,0.0 +gfx950,256,1301,6144,1536,ck,14,0,29.4757,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,833.08,930.33,0.0 +gfx950,256,2283,8192,512,ck,18,0,29.5213,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,648.72,1448.71,0.0 +gfx950,256,2364,8192,512,ck,0,0,29.5375,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,671.37,1494.25,0.0 +gfx950,256,2352,8192,512,ck,0,0,29.5605,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,667.44,1486.23,0.0 +gfx950,256,2430,8192,512,ck,0,0,29.5903,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.89,1529.27,0.0 +gfx950,256,2428,8192,512,ck,0,0,29.6146,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.75,1526.87,0.0 +gfx950,256,2374,8192,512,ck,0,0,29.6228,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,672.27,1495.65,0.0 +gfx950,256,2310,8192,512,ck,0,0,29.6296,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,654.0,1458.81,0.0 +gfx950,256,2308,8192,512,ck,0,0,29.6406,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,653.19,1457.13,0.0 +gfx950,256,2347,8192,512,ck,0,0,29.6434,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,664.16,1479.22,0.0 +gfx950,256,2350,8192,512,ck,0,0,29.6451,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,664.97,1480.85,0.0 +gfx950,256,2475,8192,512,ck,0,0,29.6467,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.31,1552.01,0.0 +gfx950,256,2341,8192,512,ck,0,0,29.6519,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,662.28,1475.38,0.0 +gfx950,256,2469,8192,512,ck,0,0,29.6719,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.02,1547.27,0.0 +gfx950,256,2348,8192,512,ck,0,0,29.6908,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,663.39,1477.43,0.0 +gfx950,256,2389,8192,512,ck,0,0,29.6999,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,674.76,1500.3,0.0 +gfx950,256,2353,8192,512,ck,0,0,29.705,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,664.48,1479.57,0.0 +gfx950,256,2485,8192,512,ck,0,0,29.714,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.54,1554.18,0.0 +gfx950,256,2349,8192,512,ck,0,0,29.715,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,663.13,1476.8,0.0 +gfx950,256,2328,8192,512,ck,0,0,29.7189,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,657.11,1464.66,0.0 +gfx950,256,2336,8192,512,ck,0,0,29.7274,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,659.18,1468.79,0.0 +gfx950,256,2529,8192,512,ck,0,0,29.7295,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,713.59,1578.37,0.0 +gfx950,256,2313,8192,512,ck,0,0,29.738,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,652.46,1455.2,0.0 +gfx950,256,2306,8192,512,ck,0,0,29.7437,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,650.36,1450.95,0.0 +gfx950,256,1334,6144,1536,ck,14,0,29.7585,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,846.09,936.82,0.0 +gfx950,256,2373,8192,512,ck,0,0,29.7585,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,668.92,1488.26,0.0 +gfx950,256,2415,8192,512,ck,0,0,29.7613,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,680.7,1511.97,0.0 +gfx950,256,2305,8192,512,ck,0,0,29.7633,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,649.65,1449.42,0.0 +gfx950,256,2309,8192,512,ck,0,0,29.7633,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,650.78,1451.69,0.0 +gfx950,256,2423,8192,512,ck,0,0,29.7634,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,682.91,1516.4,0.0 +gfx950,256,2456,8192,512,ck,0,0,29.765,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.17,1535.05,0.0 +gfx950,256,2324,8192,512,ck,0,0,29.7704,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,654.85,1459.86,0.0 +gfx950,256,2439,8192,512,ck,0,0,29.7737,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.18,1524.96,0.0 +gfx950,256,2462,8192,512,ck,0,0,29.7829,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.44,1537.54,0.0 +gfx950,256,2397,8192,512,ck,0,0,29.7843,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.1,1500.59,0.0 +gfx950,256,2369,8192,512,ck,0,0,29.7855,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,667.19,1484.65,0.0 +gfx950,256,2311,8192,512,ck,0,0,29.7866,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,650.83,1451.69,0.0 +gfx950,256,2379,8192,512,ck,0,0,29.7914,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,669.87,1490.02,0.0 +gfx950,256,2393,8192,512,ck,0,0,29.7948,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,673.74,1497.79,0.0 +gfx950,256,2383,8192,512,ck,0,0,29.8038,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,670.72,1491.67,0.0 +gfx950,256,2330,8192,512,ck,0,0,29.8041,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,655.8,1461.61,0.0 +gfx950,256,2388,8192,512,ck,0,0,29.805,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,672.1,1494.45,0.0 +gfx950,256,2361,8192,512,ck,0,0,29.8051,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,664.5,1479.13,0.0 +gfx950,256,2413,8192,512,ck,0,0,29.8052,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.13,1508.61,0.0 +gfx950,256,2426,8192,512,ck,0,0,29.8104,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,682.67,1515.71,0.0 +gfx950,256,2317,8192,512,ck,0,0,29.8159,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,651.88,1453.67,0.0 +gfx950,256,2370,8192,512,ck,0,0,29.8271,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,666.54,1483.14,0.0 +gfx950,256,2318,8192,512,ck,0,0,29.8295,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,651.86,1453.57,0.0 +gfx950,256,2520,8192,512,ck,0,0,29.8381,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,708.47,1567.53,0.0 +gfx950,256,2314,8192,512,ck,0,0,29.8388,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,650.54,1450.85,0.0 +gfx950,256,2381,8192,512,ck,0,0,29.8389,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,669.37,1488.78,0.0 +gfx950,256,2396,8192,512,ck,0,0,29.8438,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,673.48,1497.03,0.0 +gfx950,256,2325,8192,512,ck,0,0,29.8503,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,653.38,1456.52,0.0 +gfx950,256,2493,8192,512,ck,0,0,29.8508,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.58,1551.58,0.0 +gfx950,256,2526,8192,512,ck,0,0,29.8523,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.82,1570.18,0.0 +gfx950,256,2404,8192,512,ck,0,0,29.8615,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.32,1500.67,0.0 +gfx950,256,2360,8192,512,ck,0,0,29.8802,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,662.55,1474.85,0.0 +gfx950,256,2394,8192,512,ck,0,0,29.883,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,672.03,1493.94,0.0 +gfx950,256,2355,8192,512,ck,0,0,29.8856,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,661.03,1471.76,0.0 +gfx950,256,2422,8192,512,ck,0,0,29.8872,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.8,1509.56,0.0 +gfx950,256,2437,8192,512,ck,0,0,29.8879,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,683.99,1518.0,0.0 +gfx950,256,2407,8192,512,ck,0,0,29.8914,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.49,1500.87,0.0 +gfx950,256,2420,8192,512,ck,0,0,29.8922,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.12,1508.17,0.0 +gfx950,256,2315,8192,512,ck,18,0,29.8997,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,649.49,1448.46,0.0 +gfx950,256,2434,8192,512,ck,0,0,29.9031,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,682.8,1515.53,0.0 +gfx950,256,2385,8192,512,ck,0,0,29.9088,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,668.93,1487.56,0.0 +gfx950,256,2340,8192,512,ck,0,0,29.9112,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,656.25,1462.03,0.0 +gfx950,256,2358,8192,512,ck,0,0,29.9115,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,661.3,1472.18,0.0 +gfx950,256,2403,8192,512,ck,0,0,29.9116,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,673.91,1497.59,0.0 +gfx950,256,2329,8192,512,ck,0,0,29.9122,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,653.15,1455.76,0.0 +gfx950,256,2398,8192,512,ck,0,0,29.9193,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,672.34,1494.38,0.0 +gfx950,256,2455,8192,512,ck,0,0,29.9201,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.3,1526.53,0.0 +gfx950,256,2344,8192,512,ck,0,0,29.9215,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,657.15,1463.78,0.0 +gfx950,256,2399,8192,512,ck,0,0,29.9255,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,672.48,1494.64,0.0 +gfx950,256,2467,8192,512,ck,0,0,29.9286,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.47,1532.87,0.0 +gfx950,256,2386,8192,512,ck,0,0,29.9379,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,668.56,1486.68,0.0 +gfx950,256,2473,8192,512,ck,0,0,29.9432,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.81,1535.51,0.0 +gfx950,256,2402,8192,512,ck,0,0,29.9468,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,672.84,1495.27,0.0 +gfx950,256,2484,8192,512,ck,0,0,29.9532,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.66,1541.2,0.0 +gfx950,256,2346,8192,512,ck,0,0,29.9537,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,657.0,1463.34,0.0 +gfx950,256,2495,8192,512,ck,0,0,29.9564,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.67,1547.24,0.0 +gfx950,256,2522,8192,512,ck,0,0,29.9625,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.08,1562.15,0.0 +gfx950,256,1333,6144,1536,ck,14,0,29.9645,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,839.64,929.92,0.0 +gfx950,256,2450,8192,512,ck,0,0,29.9694,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.77,1521.2,0.0 +gfx950,256,2530,8192,512,ck,0,0,29.9785,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.95,1565.83,0.0 +gfx950,256,2546,8192,512,ck,0,0,29.9788,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,712.42,1574.83,0.0 +gfx950,256,2478,8192,512,ck,0,0,29.9801,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.36,1536.44,0.0 +gfx950,256,2452,8192,512,ck,0,0,29.983,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.02,1521.64,0.0 +gfx950,256,2447,8192,512,ck,0,0,29.9989,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.26,1518.02,0.0 +gfx950,256,304,7168,4096,ck,18,0,29.9993,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,595.05,1165.48,0.0 +gfx950,256,2534,8192,512,ck,0,0,30.0042,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,708.46,1566.74,0.0 +gfx950,256,2444,8192,512,ck,0,0,30.0098,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,683.17,1515.78,0.0 +gfx950,256,2401,8192,512,ck,0,0,30.0224,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,670.87,1490.94,0.0 +gfx950,256,2480,8192,512,ck,0,0,30.0262,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.85,1535.21,0.0 +gfx950,256,2477,8192,512,ck,0,0,30.0289,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.95,1533.38,0.0 +gfx950,256,2438,8192,512,ck,0,0,30.0299,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,681.04,1511.39,0.0 +gfx950,256,2479,8192,512,ck,0,0,30.0323,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.43,1534.33,0.0 +gfx950,256,2504,8192,512,ck,0,0,30.0327,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.41,1548.38,0.0 +gfx950,256,2496,8192,512,ck,0,0,30.0356,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.1,1543.73,0.0 +gfx950,256,2516,8192,512,ck,0,0,30.0359,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.68,1554.96,0.0 +gfx950,256,2542,8192,512,ck,0,0,30.036,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.94,1569.58,0.0 +gfx950,256,2532,8192,512,ck,0,0,30.0434,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.98,1563.57,0.0 +gfx950,256,2481,8192,512,ck,0,0,30.0484,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.62,1534.63,0.0 +gfx950,256,2471,8192,512,ck,0,0,30.0581,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.61,1528.52,0.0 +gfx950,256,2524,8192,512,ck,0,0,30.071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.1,1557.64,0.0 +gfx950,256,2544,8192,512,ck,0,0,30.0732,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.62,1568.76,0.0 +gfx950,256,2453,8192,512,ck,0,0,30.0793,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.1,1517.33,0.0 +gfx950,256,2503,8192,512,ck,0,0,30.0805,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.02,1545.35,0.0 +gfx950,256,2465,8192,512,ck,0,0,30.0906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.19,1523.5,0.0 +gfx950,256,2540,8192,512,ck,0,0,30.0951,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.99,1565.38,0.0 +gfx950,256,2436,8192,512,ck,0,0,30.1038,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,678.81,1506.55,0.0 +gfx950,256,2541,8192,512,ck,0,0,30.1151,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.8,1564.9,0.0 +gfx950,256,2505,8192,512,ck,0,0,30.1238,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.57,1544.25,0.0 +gfx950,256,2487,8192,512,ck,0,0,30.1445,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.08,1533.1,0.0 +gfx950,256,2490,8192,512,ck,0,0,30.1451,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.9,1534.76,0.0 +gfx950,256,2543,8192,512,ck,0,0,30.1923,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.55,1562.02,0.0 +gfx950,256,2539,8192,512,ck,0,0,30.1997,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.26,1559.39,0.0 +gfx950,256,2446,8192,512,ck,0,0,30.2226,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,678.91,1506.22,0.0 +gfx950,256,2525,8192,512,ck,0,0,30.2288,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.7,1550.07,0.0 +gfx950,256,2491,8192,512,ck,0,0,30.2357,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.1,1530.72,0.0 +gfx950,256,2441,8192,512,ck,0,0,30.2381,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,677.18,1502.66,0.0 +gfx950,256,2433,8192,512,ck,0,0,30.2393,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,674.93,1498.13,0.0 +gfx950,256,2521,8192,512,ck,0,0,30.3548,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.68,1541.41,0.0 +gfx950,256,272,7168,4608,ck,12,0,30.3627,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,591.79,1257.56,0.0 +gfx950,256,336,7168,4096,ck,18,0,30.666,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,643.38,1159.37,0.0 +gfx950,256,368,7168,4096,ck,18,0,31.518,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,685.61,1146.74,0.0 +gfx950,256,304,7168,4608,ck,18,0,32.4764,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,618.37,1194.38,0.0 +gfx950,256,2576,8192,512,ck,18,0,32.6225,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,662.4,1462.75,0.0 +gfx950,256,2597,8192,512,ck,18,0,32.6529,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,667.18,1472.25,0.0 +gfx950,256,400,7168,4096,ck,0,0,32.6624,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.12,1124.62,0.0 +gfx950,256,2571,8192,512,ck,18,0,32.6633,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,660.29,1458.33,0.0 +gfx950,256,2586,8192,512,ck,18,0,32.6773,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,663.85,1465.46,0.0 +gfx950,256,2573,8192,512,ck,18,0,32.7141,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,659.77,1457.1,0.0 +gfx950,256,2613,8192,512,ck,18,0,32.7613,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,669.06,1475.63,0.0 +gfx950,256,2585,8192,512,ck,18,0,32.765,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,661.82,1461.02,0.0 +gfx950,256,2611,8192,512,ck,18,0,32.7957,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,667.85,1473.05,0.0 +gfx950,256,2641,8192,512,ck,18,0,32.8425,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,674.56,1486.39,0.0 +gfx950,256,2616,8192,512,ck,18,0,32.8949,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,667.11,1471.18,0.0 +gfx950,256,2577,8192,512,ck,18,0,32.9165,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,656.74,1450.19,0.0 +gfx950,256,2646,8192,512,ck,18,0,32.9197,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,674.25,1485.47,0.0 +gfx950,256,2666,8192,512,ck,18,0,32.9381,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,678.97,1494.9,0.0 +gfx950,256,2602,8192,512,ck,18,0,33.0157,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,661.11,1458.63,0.0 +gfx950,256,2638,8192,512,ck,18,0,33.0425,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,669.72,1475.86,0.0 +gfx950,256,2671,8192,512,ck,18,0,33.0485,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,677.97,1492.46,0.0 +gfx950,256,2581,8192,512,ck,18,0,33.0513,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,655.07,1446.32,0.0 +gfx950,256,2656,8192,512,ck,18,0,33.0593,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,673.94,1484.3,0.0 +gfx950,256,432,7168,4096,ck,0,0,33.0674,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.13,1128.69,0.0 +gfx950,256,2665,8192,512,ck,18,0,33.073,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,675.95,1488.29,0.0 +gfx950,256,2679,8192,512,ck,18,0,33.0797,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,679.36,1495.14,0.0 +gfx950,256,2615,8192,512,ck,18,0,33.0897,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,662.93,1462.01,0.0 +gfx950,256,2670,8192,512,ck,18,0,33.0977,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,676.71,1489.73,0.0 +gfx950,256,2603,8192,512,ck,18,0,33.1053,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,659.58,1455.19,0.0 +gfx950,256,2623,8192,512,ck,18,0,33.1061,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,664.63,1465.36,0.0 +gfx950,256,2677,8192,512,ck,18,0,33.1137,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,678.16,1492.58,0.0 +gfx950,256,2574,8192,512,ck,18,0,33.1141,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,652.06,1440.01,0.0 +gfx950,256,2636,8192,512,ck,18,0,33.1438,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,667.16,1470.33,0.0 +gfx950,256,2649,8192,512,ck,18,0,33.1665,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,670.0,1475.94,0.0 +gfx950,256,2676,8192,512,ck,18,0,33.1877,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,676.39,1488.74,0.0 +gfx950,256,2590,8192,512,ck,18,0,33.1969,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,654.47,1444.56,0.0 +gfx950,256,2593,8192,512,ck,18,0,33.2033,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,655.11,1445.81,0.0 +gfx950,256,2683,8192,512,ck,18,0,33.2053,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,677.8,1491.52,0.0 +gfx950,256,2580,8192,512,ck,18,0,33.2057,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,651.77,1439.09,0.0 +gfx950,256,2661,8192,512,ck,18,0,33.2068,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,672.21,1480.26,0.0 +gfx950,256,2612,8192,512,ck,18,0,33.2081,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,659.81,1455.27,0.0 +gfx950,256,2655,8192,512,ck,18,0,33.2505,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,669.82,1475.26,0.0 +gfx950,256,2627,8192,512,ck,18,0,33.2693,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,662.38,1460.21,0.0 +gfx950,256,2579,8192,512,ck,18,0,33.2929,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,649.81,1434.81,0.0 +gfx950,256,2625,8192,512,ck,18,0,33.3185,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,660.9,1457.04,0.0 +gfx950,256,2601,8192,512,ck,18,0,33.3245,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,654.74,1444.61,0.0 +gfx950,256,2685,8192,512,ck,18,0,33.3598,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,675.17,1485.62,0.0 +gfx950,256,2651,8192,512,ck,18,0,33.3945,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,665.92,1466.88,0.0 +gfx950,256,464,7168,4096,ck,0,0,33.4122,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,815.46,1134.69,0.0 +gfx950,256,2564,8192,512,ck,18,0,33.4153,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,643.67,1421.97,0.0 +gfx950,256,336,7168,4608,ck,18,0,33.4197,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,664.17,1178.81,0.0 +gfx950,256,2631,8192,512,ck,18,0,33.4245,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,660.31,1455.45,0.0 +gfx950,256,2658,8192,512,ck,18,0,33.4258,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,667.06,1469.04,0.0 +gfx950,256,2629,8192,512,ck,18,0,33.4325,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,659.65,1454.09,0.0 +gfx950,256,2568,8192,512,ck,18,0,33.4614,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,643.78,1422.03,0.0 +gfx950,256,368,7168,4608,ck,18,0,33.4814,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,726.08,1194.74,0.0 +gfx950,256,2663,8192,512,ck,18,0,33.4874,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,667.08,1468.86,0.0 +gfx950,256,2686,8192,512,ck,18,0,33.491,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,672.77,1480.31,0.0 +gfx950,256,2674,8192,512,ck,18,0,33.4977,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,669.63,1473.96,0.0 +gfx950,256,2607,8192,512,ck,18,0,33.5013,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,652.78,1440.01,0.0 +gfx950,256,2632,8192,512,ck,18,0,33.5145,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,658.78,1452.05,0.0 +gfx950,256,2588,8192,512,ck,18,0,33.5341,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,647.39,1429.03,0.0 +gfx950,256,2672,8192,512,ck,18,0,33.5654,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,667.78,1469.98,0.0 +gfx950,256,2648,8192,512,ck,18,0,33.6205,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,660.7,1455.51,0.0 +gfx950,256,2642,8192,512,ck,18,0,33.6329,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,658.96,1451.96,0.0 +gfx950,256,2608,8192,512,ck,18,0,33.6398,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,650.35,1434.58,0.0 +gfx950,256,2628,8192,512,ck,18,0,33.6709,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,654.73,1443.29,0.0 +gfx950,256,2635,8192,512,ck,18,0,33.6965,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,655.97,1445.71,0.0 +gfx950,256,2659,8192,512,ck,18,0,33.7009,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,661.86,1457.55,0.0 +gfx950,256,2680,8192,512,ck,18,0,33.7058,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,666.99,1467.87,0.0 +gfx950,256,1351,6144,1536,ck,18,0,33.7161,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,756.29,833.83,0.0 +gfx950,256,2650,8192,512,ck,18,0,33.8005,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,657.68,1448.76,0.0 +gfx950,256,2737,8192,512,ck,18,0,33.8033,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,679.21,1492.12,0.0 +gfx950,256,2727,8192,512,ck,18,0,33.8046,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,676.7,1487.07,0.0 +gfx950,256,2697,8192,512,ck,18,0,33.8098,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,669.16,1471.85,0.0 +gfx950,256,2624,8192,512,ck,18,0,33.9446,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,648.46,1429.67,0.0 +gfx950,256,2733,8192,512,ck,18,0,33.991,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,674.47,1481.89,0.0 +gfx950,256,2681,8192,512,ck,0,0,34.0014,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,661.44,1455.6,0.0 +gfx950,256,2639,8192,512,ck,0,0,34.009,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,650.93,1434.41,0.0 +gfx950,256,2718,8192,512,ck,18,0,34.017,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,670.26,1473.31,0.0 +gfx950,256,2773,8192,512,ck,0,0,34.0291,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,683.58,1500.1,0.0 +gfx950,256,2801,8192,512,ck,0,0,34.0438,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.18,1513.34,0.0 +gfx950,256,2750,8192,512,ck,18,0,34.0534,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,677.43,1487.61,0.0 +gfx950,256,2692,8192,512,ck,0,0,34.0636,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,662.94,1458.4,0.0 +gfx950,256,2719,8192,512,ck,18,0,34.1001,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,668.87,1470.22,0.0 +gfx950,256,2724,8192,512,ck,18,0,34.1085,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,669.94,1472.33,0.0 +gfx950,256,2761,8192,512,ck,0,0,34.1103,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.0,1490.58,0.0 +gfx950,256,2738,8192,512,ck,18,0,34.115,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,673.25,1478.98,0.0 +gfx950,256,2732,8192,512,ck,18,0,34.1198,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,671.68,1475.81,0.0 +gfx950,256,2694,8192,512,ck,18,0,34.1265,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,662.21,1456.7,0.0 +gfx950,256,2712,8192,512,ck,0,0,34.1398,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,666.37,1465.04,0.0 +gfx950,256,2710,8192,512,ck,18,0,34.1402,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,665.88,1464.04,0.0 +gfx950,256,2735,8192,512,ck,18,0,34.1433,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,671.96,1476.27,0.0 +gfx950,256,2744,8192,512,ck,0,0,34.1471,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,674.09,1480.56,0.0 +gfx950,256,496,7168,4096,ck,0,0,34.1558,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,852.72,1127.26,0.0 +gfx950,256,2704,8192,512,ck,0,0,34.1759,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,663.71,1459.54,0.0 +gfx950,256,2706,8192,512,ck,0,0,34.1864,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,663.99,1460.08,0.0 +gfx950,256,2705,8192,512,ck,0,0,34.2071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,663.35,1458.7,0.0 +gfx950,256,2725,8192,512,ck,18,0,34.2094,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,668.21,1468.48,0.0 +gfx950,256,2760,8192,512,ck,0,0,34.2105,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,676.77,1485.72,0.0 +gfx950,256,2758,8192,512,ck,0,0,34.2146,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,676.2,1484.56,0.0 +gfx950,256,2708,8192,512,ck,0,0,34.2299,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,663.64,1459.21,0.0 +gfx950,256,2721,8192,512,ck,18,0,34.2437,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,666.56,1465.04,0.0 +gfx950,256,2728,8192,512,ck,0,0,34.2464,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,668.22,1468.38,0.0 +gfx950,256,2756,8192,512,ck,0,0,34.2551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,674.91,1481.81,0.0 +gfx950,256,2734,8192,512,ck,0,0,34.2554,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,669.51,1470.95,0.0 +gfx950,256,2757,8192,512,ck,0,0,34.2599,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.06,1482.1,0.0 +gfx950,256,2826,8192,512,ck,0,0,34.2695,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.76,1515.7,0.0 +gfx950,256,2787,8192,512,ck,0,0,34.2802,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,682.0,1496.01,0.0 +gfx950,256,2765,8192,512,ck,0,0,34.2818,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,676.58,1485.1,0.0 +gfx950,256,2794,8192,512,ck,0,0,34.2921,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,683.47,1498.94,0.0 +gfx950,256,2747,8192,512,ck,18,0,34.293,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,671.96,1475.74,0.0 +gfx950,256,2751,8192,512,ck,18,0,34.299,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,672.82,1477.45,0.0 +gfx950,256,2784,8192,512,ck,0,0,34.3014,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,680.84,1493.61,0.0 +gfx950,256,2798,8192,512,ck,0,0,34.3021,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.25,1500.47,0.0 +gfx950,256,2695,8192,512,ck,0,0,34.3092,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,658.93,1449.44,0.0 +gfx950,256,2743,8192,512,ck,0,0,34.3181,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,670.49,1472.69,0.0 +gfx950,256,2766,8192,512,ck,0,0,34.3311,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.86,1483.45,0.0 +gfx950,256,2715,8192,512,ck,18,0,34.3417,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,663.19,1457.91,0.0 +gfx950,256,2795,8192,512,ck,0,0,34.3454,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,682.66,1497.1,0.0 +gfx950,256,2785,8192,512,ck,18,0,34.3457,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,680.21,1492.17,0.0 +gfx950,256,2771,8192,512,ck,0,0,34.3518,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,676.67,1485.02,0.0 +gfx950,256,2723,8192,512,ck,0,0,34.3618,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,664.76,1460.99,0.0 +gfx950,256,2783,8192,512,ck,0,0,34.3636,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.37,1490.41,0.0 +gfx950,256,2789,8192,512,ck,0,0,34.3767,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,680.57,1492.79,0.0 +gfx950,256,2793,8192,512,ck,0,0,34.3816,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,681.45,1494.54,0.0 +gfx950,256,2700,8192,512,ck,0,0,34.385,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,658.7,1448.7,0.0 +gfx950,256,2836,8192,512,ck,0,0,34.3899,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.78,1515.31,0.0 +gfx950,256,2786,8192,512,ck,0,0,34.3918,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.54,1490.66,0.0 +gfx950,256,2802,8192,512,ck,0,0,34.3968,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,683.34,1498.3,0.0 +gfx950,256,2915,8192,512,ck,0,0,34.4032,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,710.77,1553.52,0.0 +gfx950,256,2837,8192,512,ck,0,0,34.4111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.59,1514.87,0.0 +gfx950,256,2741,8192,512,ck,0,0,34.4159,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,668.1,1467.53,0.0 +gfx950,256,2755,8192,512,ck,0,0,34.4185,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,671.46,1474.29,0.0 +gfx950,256,2788,8192,512,ck,0,0,34.4216,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.44,1490.35,0.0 +gfx950,256,2790,8192,512,ck,0,0,34.4287,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.79,1491.03,0.0 +gfx950,256,2753,8192,512,ck,0,0,34.4314,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,670.72,1472.75,0.0 +gfx950,256,2875,8192,512,ck,0,0,34.4352,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.37,1532.45,0.0 +gfx950,256,2873,8192,512,ck,0,0,34.4372,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.84,1531.38,0.0 +gfx950,256,2814,8192,512,ck,0,0,34.441,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.39,1502.27,0.0 +gfx950,256,2828,8192,512,ck,0,0,34.4411,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.8,1509.13,0.0 +gfx950,256,2831,8192,512,ck,0,0,34.4417,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.52,1510.58,0.0 +gfx950,256,1490,6144,1536,ck,0,0,34.4454,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.45,871.96,0.0 +gfx950,256,2772,8192,512,ck,0,0,34.4474,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.04,1481.39,0.0 +gfx950,256,2752,8192,512,ck,0,0,34.4497,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,670.12,1471.48,0.0 +gfx950,256,2806,8192,512,ck,0,0,34.4501,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,683.26,1497.95,0.0 +gfx950,256,2739,8192,512,ck,0,0,34.4534,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,666.88,1464.95,0.0 +gfx950,256,2880,8192,512,ck,0,0,34.4704,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.87,1533.34,0.0 +gfx950,256,2871,8192,512,ck,0,0,34.4935,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.21,1527.9,0.0 +gfx950,256,2883,8192,512,ck,0,0,34.4963,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.07,1533.66,0.0 +gfx950,256,2827,8192,512,ck,0,0,34.4975,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.43,1506.18,0.0 +gfx950,256,2780,8192,512,ck,0,0,34.5123,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.71,1482.52,0.0 +gfx950,256,2900,8192,512,ck,0,0,34.5284,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.55,1540.55,0.0 +gfx950,256,2830,8192,512,ck,0,0,34.5289,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.53,1506.27,0.0 +gfx950,256,2844,8192,512,ck,0,0,34.5322,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.87,1512.98,0.0 +gfx950,256,2759,8192,512,ck,0,0,34.534,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,670.19,1471.31,0.0 +gfx950,256,2822,8192,512,ck,0,0,34.5393,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.38,1501.91,0.0 +gfx950,256,2939,8192,512,ck,0,0,34.5464,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,713.65,1558.82,0.0 +gfx950,256,1040,8192,1536,ck,18,0,34.5473,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,757.58,903.68,0.0 +gfx950,256,2932,8192,512,ck,0,0,34.5486,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.91,1555.3,0.0 +gfx950,256,2870,8192,512,ck,0,0,34.5499,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.83,1524.92,0.0 +gfx950,256,2902,8192,512,ck,0,0,34.5514,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.57,1540.5,0.0 +gfx950,256,2906,8192,512,ck,0,0,34.5541,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.48,1542.34,0.0 +gfx950,256,2852,8192,512,ck,0,0,34.5581,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.29,1515.76,0.0 +gfx950,256,2862,8192,512,ck,0,0,34.5634,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,694.61,1520.41,0.0 +gfx950,256,2930,8192,512,ck,0,0,34.5681,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.02,1553.44,0.0 +gfx950,256,2839,8192,512,ck,0,0,34.574,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.82,1508.71,0.0 +gfx950,256,1047,8192,1536,ck,0,0,34.5743,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.09,906.6,0.0 +gfx950,256,2841,8192,512,ck,0,0,34.5865,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.06,1509.14,0.0 +gfx950,256,2876,8192,512,ck,0,0,34.5898,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.48,1526.09,0.0 +gfx950,256,2927,8192,512,ck,0,0,34.5925,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.79,1550.88,0.0 +gfx950,256,2779,8192,512,ck,0,0,34.6005,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,673.75,1478.25,0.0 +gfx950,256,2914,8192,512,ck,0,0,34.6196,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.09,1543.32,0.0 +gfx950,256,2863,8192,512,ck,0,0,34.6308,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.5,1517.94,0.0 +gfx950,256,2854,8192,512,ck,0,0,34.6369,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.2,1513.28,0.0 +gfx950,256,2937,8192,512,ck,0,0,34.6377,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.29,1553.74,0.0 +gfx950,256,2840,8192,512,ck,0,0,34.6402,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.75,1506.31,0.0 +gfx950,256,2909,8192,512,ck,0,0,34.6474,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.31,1539.65,0.0 +gfx950,256,2861,8192,512,ck,0,0,34.6476,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.68,1516.23,0.0 +gfx950,256,2910,8192,512,ck,0,0,34.6494,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.51,1540.05,0.0 +gfx950,256,2846,8192,512,ck,0,0,34.6514,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.98,1508.75,0.0 +gfx950,256,2942,8192,512,ck,0,0,34.6525,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,712.19,1555.51,0.0 +gfx950,256,2886,8192,512,ck,0,0,34.6579,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.53,1527.97,0.0 +gfx950,256,1485,6144,1536,ck,0,0,34.6596,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.68,864.58,0.0 +gfx950,256,2872,8192,512,ck,0,0,34.6609,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.08,1521.01,0.0 +gfx950,256,2925,8192,512,ck,0,0,34.6998,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.11,1545.11,0.0 +gfx950,256,2819,8192,512,ck,0,0,34.7026,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,681.43,1493.38,0.0 +gfx950,256,2867,8192,512,ck,0,0,34.7165,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.76,1516.14,0.0 +gfx950,256,2817,8192,512,ck,0,0,34.7444,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,680.13,1490.61,0.0 +gfx950,256,1087,8192,1536,ck,0,0,34.7526,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.14,922.58,0.0 +gfx950,256,2922,8192,512,ck,0,0,34.763,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.1,1540.85,0.0 +gfx950,256,2904,8192,512,ck,0,0,34.7667,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.69,1531.93,0.0 +gfx950,256,2926,8192,512,ck,0,0,34.7775,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.77,1542.15,0.0 +gfx950,256,2933,8192,512,ck,0,0,34.7906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.2,1544.97,0.0 +gfx950,256,2878,8192,512,ck,0,0,34.7948,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.85,1518.07,0.0 +gfx950,256,2923,8192,512,ck,0,0,34.8031,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.53,1539.56,0.0 +gfx950,256,400,7168,4608,ck,0,0,34.8109,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.08,1166.52,0.0 +gfx950,256,104,9216,7168,ck,18,0,34.8123,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,394.7,1974.09,0.0 +gfx950,256,2782,8192,512,ck,0,0,34.8142,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,670.33,1470.63,0.0 +gfx950,256,2879,8192,512,ck,0,0,34.8189,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.61,1517.51,0.0 +gfx950,256,1601,6144,1536,ck,0,0,34.8263,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,867.67,906.48,0.0 +gfx950,256,2917,8192,512,ck,0,0,34.8349,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.44,1535.24,0.0 +gfx950,256,3001,8192,512,ck,0,0,34.8365,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,722.64,1575.91,0.0 +gfx950,256,1112,8192,1536,ck,0,0,34.8502,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.99,932.85,0.0 +gfx950,256,2934,8192,512,ck,0,0,34.8539,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.15,1542.64,0.0 +gfx950,256,1580,6144,1536,ck,0,0,34.8557,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,855.57,897.39,0.0 +gfx950,256,2882,8192,512,ck,0,0,34.8563,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.59,1517.33,0.0 +gfx950,256,2896,8192,512,ck,0,0,34.8593,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.9,1523.99,0.0 +gfx950,256,2997,8192,512,ck,0,0,34.9056,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,720.25,1570.85,0.0 +gfx950,256,3072,8192,512,ck,0,0,34.961,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.1,1604.61,0.0 +gfx950,256,2920,8192,512,ck,0,0,34.9635,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.58,1531.04,0.0 +gfx950,256,1623,6144,1536,ck,0,0,34.9648,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,876.11,911.59,0.0 +gfx950,256,3043,8192,512,ck,0,0,34.9701,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.95,1590.18,0.0 +gfx950,256,2969,8192,512,ck,0,0,34.9806,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.99,1553.96,0.0 +gfx950,256,2958,8192,512,ck,0,0,34.9821,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.32,1548.58,0.0 +gfx950,256,136,9216,7168,ck,18,0,35.0048,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,513.31,1986.64,0.0 +gfx950,256,3042,8192,512,ck,0,0,35.0113,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.85,1587.83,0.0 +gfx950,256,3055,8192,512,ck,0,0,35.0193,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,731.8,1593.74,0.0 +gfx950,256,2978,8192,512,ck,0,0,35.0215,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,713.31,1556.49,0.0 +gfx950,256,3047,8192,512,ck,0,0,35.0284,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.7,1589.47,0.0 +gfx950,256,3041,8192,512,ck,0,0,35.029,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.25,1586.54,0.0 +gfx950,256,3006,8192,512,ck,0,0,35.0486,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.46,1568.78,0.0 +gfx950,256,2907,8192,512,ck,0,0,35.051,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.72,1520.95,0.0 +gfx950,256,1517,6144,1536,ck,18,0,35.0534,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,816.82,867.48,0.0 +gfx950,256,3062,8192,512,ck,0,0,35.0561,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,732.71,1595.44,0.0 +gfx950,256,2968,8192,512,ck,0,0,35.0751,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.83,1549.29,0.0 +gfx950,256,2974,8192,512,ck,0,0,35.0758,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.25,1552.15,0.0 +gfx950,256,3019,8192,512,ck,0,0,35.0786,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,721.96,1573.7,0.0 +gfx950,256,3056,8192,512,ck,0,0,35.0793,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.79,1591.49,0.0 +gfx950,256,3044,8192,512,ck,0,0,35.0955,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,727.58,1584.98,0.0 +gfx950,256,1132,8192,1536,ck,0,0,35.0998,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,811.62,936.43,0.0 +gfx950,256,1185,8192,1536,ck,0,0,35.1038,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,849.52,963.37,0.0 +gfx950,256,2948,8192,512,ck,0,0,35.1132,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.28,1537.99,0.0 +gfx950,256,2973,8192,512,ck,0,0,35.1141,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,710.24,1549.98,0.0 +gfx950,256,1144,8192,1536,ck,0,0,35.1194,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.77,942.03,0.0 +gfx950,256,432,7168,4608,ck,0,0,35.121,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.56,1173.48,0.0 +gfx950,256,2984,8192,512,ck,0,0,35.1262,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,712.62,1554.74,0.0 +gfx950,256,2972,8192,512,ck,0,0,35.1318,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.64,1548.72,0.0 +gfx950,256,2994,8192,512,ck,0,0,35.1415,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.7,1558.87,0.0 +gfx950,256,3070,8192,512,ck,0,0,35.1468,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,732.73,1595.17,0.0 +gfx950,256,1553,6144,1536,ck,0,0,35.1486,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.94,879.29,0.0 +gfx950,256,1639,6144,1536,ck,0,0,35.1509,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,880.07,913.06,0.0 +gfx950,256,3013,8192,512,ck,0,0,35.1514,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.03,1567.56,0.0 +gfx950,256,3020,8192,512,ck,0,0,35.1581,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,720.56,1570.63,0.0 +gfx950,256,2983,8192,512,ck,0,0,35.1684,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.53,1552.39,0.0 +gfx950,256,2971,8192,512,ck,0,0,35.1737,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,708.56,1546.39,0.0 +gfx950,256,2996,8192,512,ck,0,0,35.2058,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,713.87,1556.98,0.0 +gfx950,256,2999,8192,512,ck,0,0,35.2096,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.51,1558.25,0.0 +gfx950,256,3027,8192,512,ck,0,0,35.2103,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,721.16,1571.66,0.0 +gfx950,256,3063,8192,512,ck,0,0,35.2527,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.86,1587.02,0.0 +gfx950,256,3017,8192,512,ck,0,0,35.2624,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,717.72,1564.54,0.0 +gfx950,256,2982,8192,512,ck,0,0,35.2644,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.35,1547.68,0.0 +gfx950,256,1255,8192,1536,ck,0,0,35.2856,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,895.07,993.96,0.0 +gfx950,256,144,9216,7168,ck,18,0,35.2972,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,539.0,1975.98,0.0 +gfx950,256,2990,8192,512,ck,0,0,35.2978,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,710.58,1550.05,0.0 +gfx950,256,1548,6144,1536,ck,0,0,35.3037,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.61,873.47,0.0 +gfx950,256,3029,8192,512,ck,0,0,35.3078,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.65,1568.27,0.0 +gfx950,256,152,9216,7168,ck,18,0,35.313,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,568.7,1980.9,0.0 +gfx950,256,3007,8192,512,ck,0,0,35.323,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.11,1557.08,0.0 +gfx950,256,2960,8192,512,ck,0,0,35.3258,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.89,1534.47,0.0 +gfx950,256,1696,6144,1536,ck,0,0,35.3344,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,905.94,930.61,0.0 +gfx950,256,3000,8192,512,ck,0,0,35.3554,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.8,1552.3,0.0 +gfx950,256,1718,6144,1536,ck,0,0,35.3642,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,916.92,938.43,0.0 +gfx950,256,1590,6144,1536,ck,0,0,35.3759,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,848.32,888.1,0.0 +gfx950,256,2998,8192,512,ck,0,0,35.3982,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,710.46,1549.47,0.0 +gfx950,256,3010,8192,512,ck,0,0,35.4258,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,712.75,1553.99,0.0 +gfx950,256,112,9216,7168,ck,18,0,35.4385,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,417.55,1944.99,0.0 +gfx950,256,3071,8192,512,ck,0,0,35.4418,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,726.87,1582.37,0.0 +gfx950,256,3060,8192,512,ck,0,0,35.4622,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.85,1576.22,0.0 +gfx950,256,3037,8192,512,ck,0,0,35.4624,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,718.4,1565.25,0.0 +gfx950,256,3030,8192,512,ck,0,0,35.4749,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,716.49,1561.36,0.0 +gfx950,256,2945,8192,512,ck,0,0,35.4765,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.36,1520.81,0.0 +gfx950,256,1265,8192,1536,ck,0,0,35.4858,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,897.11,993.4,0.0 +gfx950,256,3002,8192,512,ck,0,0,35.4874,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.62,1547.48,0.0 +gfx950,256,3048,8192,512,ck,0,0,35.4968,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,720.3,1568.97,0.0 +gfx950,256,3038,8192,512,ck,0,0,35.5116,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,717.64,1563.56,0.0 +gfx950,256,1599,6144,1536,ck,0,0,35.5136,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,849.82,888.16,0.0 +gfx950,256,1685,6144,1536,ck,0,0,35.5311,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,895.08,921.18,0.0 +gfx950,256,1710,6144,1536,ck,0,0,35.5332,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,908.31,930.85,0.0 +gfx950,256,3022,8192,512,ck,0,0,35.6086,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.92,1551.7,0.0 +gfx950,256,464,7168,4608,ck,0,0,35.7385,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,857.67,1170.17,0.0 +gfx950,256,3021,8192,512,ck,0,0,35.7398,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.07,1545.54,0.0 +gfx950,256,1813,6144,1536,ck,0,0,35.9075,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,952.98,960.8,0.0 +gfx950,256,1816,6144,1536,ck,0,0,35.9326,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,953.89,961.29,0.0 +gfx950,256,1808,6144,1536,ck,0,0,36.0777,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,945.87,954.36,0.0 +gfx950,256,168,9216,7168,ck,18,0,36.0793,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,615.21,1950.18,0.0 +gfx950,256,2964,8192,512,ck,0,0,36.0847,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.04,1504.07,0.0 +gfx950,256,1334,8192,1536,ck,0,0,36.1817,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,927.85,1008.47,0.0 +gfx950,256,1810,6144,1536,ck,0,0,36.3548,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,939.7,947.84,0.0 +gfx950,256,176,9216,7168,ck,18,0,36.373,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,639.3,1940.06,0.0 +gfx950,256,496,7168,4608,ck,0,0,36.412,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,899.87,1165.18,0.0 +gfx950,256,1821,6144,1536,ck,0,0,36.4393,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,943.22,949.82,0.0 +gfx950,256,1865,6144,1536,ck,0,0,36.4996,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,964.41,964.91,0.0 +gfx950,256,1333,8192,1536,ck,0,0,36.5445,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,917.95,997.97,0.0 +gfx950,256,1809,6144,1536,ck,0,0,36.6501,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,931.61,939.83,0.0 +gfx950,256,1817,6144,1536,ck,0,0,36.69,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,934.72,941.82,0.0 +gfx950,256,184,9216,7168,ck,18,0,36.7408,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,661.67,1926.22,0.0 +gfx950,256,1867,6144,1536,ck,0,0,36.7481,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,958.92,959.14,0.0 +gfx950,256,1490,8192,1536,ck,0,0,36.9146,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1015.78,1064.18,0.0 +gfx950,256,120,9216,7168,ck,18,0,37.1231,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,427.08,1862.24,0.0 +gfx950,256,1485,8192,1536,ck,0,0,37.5002,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,996.56,1045.17,0.0 +gfx950,256,1517,8192,1536,ck,0,0,37.7214,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1012.07,1054.24,0.0 +gfx950,256,1922,6144,1536,ck,0,0,37.8157,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,959.3,952.17,0.0 +gfx950,256,1934,6144,1536,ck,0,0,37.8797,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,963.66,954.94,0.0 +gfx950,256,3092,8192,512,ck,18,0,37.9785,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,682.95,1486.02,0.0 +gfx950,256,2002,6144,1536,ck,0,0,38.0305,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,993.58,975.87,0.0 +gfx950,256,2000,6144,1536,ck,0,0,38.0578,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,991.88,974.44,0.0 +gfx950,256,2003,6144,1536,ck,0,0,38.1307,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,991.47,973.67,0.0 +gfx950,256,3117,8192,512,ck,18,0,38.1582,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,685.23,1490.09,0.0 +gfx950,256,3099,8192,512,ck,18,0,38.1634,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,681.18,1481.92,0.0 +gfx950,256,3161,8192,512,ck,18,0,38.2194,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,693.79,1507.16,0.0 +gfx950,256,2041,6144,1536,ck,0,0,38.3041,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1005.7,982.97,0.0 +gfx950,256,2006,6144,1536,ck,0,0,38.3517,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,987.23,969.14,0.0 +gfx950,256,1968,6144,1536,ck,0,0,38.3655,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,968.18,955.1,0.0 +gfx950,256,1978,6144,1536,ck,0,0,38.3837,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,972.64,958.25,0.0 +gfx950,256,2032,6144,1536,ck,0,0,38.4282,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,998.04,976.56,0.0 +gfx950,256,3131,8192,512,ck,18,0,38.4494,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,683.1,1484.96,0.0 +gfx950,256,2040,6144,1536,ck,0,0,38.4495,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1001.41,978.9,0.0 +gfx950,256,3080,8192,512,ck,18,0,38.4498,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,671.96,1462.53,0.0 +gfx950,256,3098,8192,512,ck,0,0,38.4552,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.8,1470.23,0.0 +gfx950,256,1991,6144,1536,ck,0,0,38.4659,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,976.94,960.87,0.0 +gfx950,256,3077,8192,512,ck,18,0,38.4706,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,670.95,1460.42,0.0 +gfx950,256,3091,8192,512,ck,18,0,38.4902,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,673.66,1465.82,0.0 +gfx950,256,3075,8192,512,ck,18,0,38.507,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,669.88,1458.16,0.0 +gfx950,256,1944,6144,1536,ck,0,0,38.5543,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,951.69,941.82,0.0 +gfx950,256,2048,6144,1536,ck,0,0,38.5623,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1002.4,978.9,0.0 +gfx950,256,3095,8192,512,ck,0,0,38.57,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,673.13,1464.54,0.0 +gfx950,256,1987,6144,1536,ck,0,0,38.5756,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,972.2,956.71,0.0 +gfx950,256,3180,8192,512,ck,18,0,38.5828,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,691.39,1501.28,0.0 +gfx950,256,2011,6144,1536,ck,0,0,38.5831,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,983.76,965.12,0.0 +gfx950,256,3090,8192,512,ck,0,0,38.5839,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,671.8,1461.83,0.0 +gfx950,256,3113,8192,512,ck,18,0,38.5994,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,676.53,1471.31,0.0 +gfx950,256,3118,8192,512,ck,18,0,38.6002,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,677.6,1473.46,0.0 +gfx950,256,2046,6144,1536,ck,0,0,38.6245,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,999.8,976.61,0.0 +gfx950,256,3177,8192,512,ck,18,0,38.6462,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,689.6,1497.51,0.0 +gfx950,256,3200,8192,512,ck,0,0,38.6487,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,694.55,1507.46,0.0 +gfx950,256,3191,8192,512,ck,18,0,38.6598,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,692.4,1503.1,0.0 +gfx950,256,3144,8192,512,ck,18,0,38.6658,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,682.1,1482.33,0.0 +gfx950,256,3221,8192,512,ck,0,0,38.6697,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.73,1515.82,0.0 +gfx950,256,3101,8192,512,ck,18,0,38.6706,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,672.68,1463.35,0.0 +gfx950,256,3102,8192,512,ck,18,0,38.6898,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,672.57,1463.07,0.0 +gfx950,256,3137,8192,512,ck,18,0,38.7006,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,679.97,1477.94,0.0 +gfx950,256,3167,8192,512,ck,0,0,38.7155,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.2,1490.46,0.0 +gfx950,256,2009,6144,1536,ck,0,0,38.7554,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,978.41,960.11,0.0 +gfx950,256,3182,8192,512,ck,18,0,38.7606,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,688.65,1495.27,0.0 +gfx950,256,3185,8192,512,ck,0,0,38.7648,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.23,1496.41,0.0 +gfx950,256,3074,8192,512,ck,0,0,38.7715,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,665.09,1447.78,0.0 +gfx950,256,3194,8192,512,ck,18,0,38.791,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,690.71,1499.32,0.0 +gfx950,256,2012,6144,1536,ck,0,0,38.8214,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,978.2,959.55,0.0 +gfx950,256,2004,6144,1536,ck,0,0,38.8219,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,974.3,956.69,0.0 +gfx950,256,3157,8192,512,ck,0,0,38.832,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,681.98,1481.64,0.0 +gfx950,256,3135,8192,512,ck,18,0,38.8326,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,677.22,1472.04,0.0 +gfx950,256,3183,8192,512,ck,18,0,38.8457,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,687.36,1492.42,0.0 +gfx950,256,3198,8192,512,ck,0,0,38.85,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.52,1498.78,0.0 +gfx950,256,3085,8192,512,ck,0,0,38.8579,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,665.99,1449.34,0.0 +gfx950,256,3197,8192,512,ck,0,0,38.8591,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.14,1498.0,0.0 +gfx950,256,3148,8192,512,ck,0,0,38.8785,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.23,1475.95,0.0 +gfx950,256,3176,8192,512,ck,0,0,38.8814,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.22,1488.01,0.0 +gfx950,256,3173,8192,512,ck,0,0,38.886,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.49,1486.53,0.0 +gfx950,256,3107,8192,512,ck,0,0,38.8867,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,670.24,1457.83,0.0 +gfx950,256,3136,8192,512,ck,0,0,38.8875,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,676.48,1470.4,0.0 +gfx950,256,2037,6144,1536,ck,0,0,38.8935,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,988.52,966.66,0.0 +gfx950,256,3140,8192,512,ck,18,0,38.8954,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,677.21,1471.84,0.0 +gfx950,256,3181,8192,512,ck,0,0,38.8974,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.01,1489.57,0.0 +gfx950,256,3175,8192,512,ck,0,0,38.902,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.64,1486.79,0.0 +gfx950,256,3134,8192,512,ck,0,0,38.9129,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.61,1468.57,0.0 +gfx950,256,3106,8192,512,ck,0,0,38.9144,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,669.55,1456.36,0.0 +gfx950,256,3287,8192,512,ck,0,0,38.9165,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,708.53,1534.86,0.0 +gfx950,256,3192,8192,512,ck,0,0,38.9182,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.02,1493.55,0.0 +gfx950,256,3328,8192,512,ck,0,0,38.9306,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,717.1,1552.1,0.0 +gfx950,256,1980,6144,1536,ck,0,0,38.9355,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,959.82,945.38,0.0 +gfx950,256,3189,8192,512,ck,18,0,38.9462,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,686.88,1491.18,0.0 +gfx950,256,3088,8192,512,ck,18,0,38.9466,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,665.12,1447.34,0.0 +gfx950,256,3129,8192,512,ck,18,0,38.9598,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,673.72,1464.64,0.0 +gfx950,256,3163,8192,512,ck,0,0,38.9653,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,680.94,1479.17,0.0 +gfx950,256,3188,8192,512,ck,0,0,38.9699,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.24,1489.84,0.0 +gfx950,256,3209,8192,512,ck,0,0,38.9712,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.74,1498.89,0.0 +gfx950,256,3155,8192,512,ck,0,0,38.9757,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.04,1475.31,0.0 +gfx950,256,3186,8192,512,ck,0,0,39.0044,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.21,1487.65,0.0 +gfx950,256,2035,6144,1536,ck,0,0,39.0055,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,984.72,963.17,0.0 +gfx950,256,3142,8192,512,ck,0,0,39.0062,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,675.71,1468.52,0.0 +gfx950,256,3217,8192,512,ck,0,0,39.0183,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.63,1500.55,0.0 +gfx950,256,3286,8192,512,ck,0,0,39.0382,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.1,1529.64,0.0 +gfx950,256,3159,8192,512,ck,0,0,39.0406,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,678.77,1474.59,0.0 +gfx950,256,3187,8192,512,ck,0,0,39.043,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.74,1486.61,0.0 +gfx950,256,3253,8192,512,ck,0,0,39.0571,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.67,1514.63,0.0 +gfx950,256,3190,8192,512,ck,0,0,39.0703,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.91,1486.87,0.0 +gfx950,256,3250,8192,512,ck,0,0,39.0761,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.69,1512.59,0.0 +gfx950,256,3396,8192,512,ck,0,0,39.086,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.85,1575.32,0.0 +gfx950,256,3208,8192,512,ck,0,0,39.0958,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.33,1493.68,0.0 +gfx950,256,3222,8192,512,ck,0,0,39.1291,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.74,1498.46,0.0 +gfx950,256,3178,8192,512,ck,0,0,39.1446,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,681.04,1478.87,0.0 +gfx950,256,3251,8192,512,ck,0,0,39.1557,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.49,1509.95,0.0 +gfx950,256,3239,8192,512,ck,0,0,39.1565,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.9,1504.74,0.0 +gfx950,256,3310,8192,512,ck,0,0,39.1582,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.08,1535.31,0.0 +gfx950,256,3233,8192,512,ck,0,0,39.1606,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.54,1502.0,0.0 +gfx950,256,3229,8192,512,ck,0,0,39.1685,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.55,1499.97,0.0 +gfx950,256,3338,8192,512,ck,0,0,39.1811,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.66,1546.49,0.0 +gfx950,256,3219,8192,512,ck,0,0,39.1928,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.98,1494.73,0.0 +gfx950,256,3170,8192,512,ck,18,0,39.195,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,678.45,1473.52,0.0 +gfx950,256,3321,8192,512,ck,0,0,39.1958,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,710.75,1538.58,0.0 +gfx950,256,3349,8192,512,ck,0,0,39.2149,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,716.4,1549.9,0.0 +gfx950,256,3206,8192,512,ck,0,0,39.23,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.54,1487.71,0.0 +gfx950,256,3247,8192,512,ck,0,0,39.2303,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,694.31,1505.36,0.0 +gfx950,256,3260,8192,512,ck,0,0,39.2304,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.08,1510.95,0.0 +gfx950,256,3311,8192,512,ck,0,0,39.2329,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.94,1532.82,0.0 +gfx950,256,3306,8192,512,ck,0,0,39.2381,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.78,1530.46,0.0 +gfx950,256,3280,8192,512,ck,0,0,39.2385,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.22,1519.25,0.0 +gfx950,256,3282,8192,512,ck,0,0,39.2734,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.02,1518.76,0.0 +gfx950,256,3279,8192,512,ck,0,0,39.2741,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.37,1517.45,0.0 +gfx950,256,3223,8192,512,ck,0,0,39.2762,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.37,1493.27,0.0 +gfx950,256,3216,8192,512,ck,0,0,39.2781,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.84,1490.19,0.0 +gfx950,256,3414,8192,512,ck,0,0,39.2822,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.05,1575.2,0.0 +gfx950,256,3255,8192,512,ck,0,0,39.283,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.08,1506.78,0.0 +gfx950,256,3269,8192,512,ck,0,0,39.2831,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.07,1512.8,0.0 +gfx950,256,3243,8192,512,ck,0,0,39.288,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.43,1501.43,0.0 +gfx950,256,3301,8192,512,ck,0,0,39.2918,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.75,1526.22,0.0 +gfx950,256,3225,8192,512,ck,0,0,39.2927,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.51,1493.51,0.0 +gfx950,256,3234,8192,512,ck,0,0,39.2971,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.35,1497.21,0.0 +gfx950,256,3265,8192,512,ck,0,0,39.3046,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.83,1510.25,0.0 +gfx950,256,3288,8192,512,ck,0,0,39.3139,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.58,1519.78,0.0 +gfx950,256,3256,8192,512,ck,0,0,39.3145,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,694.74,1506.0,0.0 +gfx950,256,3445,8192,512,ck,0,0,39.3182,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.0,1587.08,0.0 +gfx950,256,3327,8192,512,ck,0,0,39.3299,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.61,1535.91,0.0 +gfx950,256,3224,8192,512,ck,0,0,39.3509,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.27,1490.87,0.0 +gfx950,256,3281,8192,512,ck,0,0,39.3582,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.3,1515.06,0.0 +gfx950,256,3325,8192,512,ck,0,0,39.3628,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,708.59,1533.77,0.0 +gfx950,256,3302,8192,512,ck,0,0,39.3658,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.64,1523.78,0.0 +gfx950,256,3263,8192,512,ck,0,0,39.368,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.29,1506.96,0.0 +gfx950,256,3363,8192,512,ck,0,0,39.3688,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,716.58,1549.85,0.0 +gfx950,256,3273,8192,512,ck,0,0,39.3757,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.28,1510.96,0.0 +gfx950,256,3365,8192,512,ck,0,0,39.3765,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,716.87,1550.4,0.0 +gfx950,256,3276,8192,512,ck,0,0,39.3801,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.84,1512.07,0.0 +gfx950,256,3201,8192,512,ck,0,0,39.3892,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,681.71,1479.55,0.0 +gfx950,256,3227,8192,512,ck,0,0,39.3965,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.12,1490.43,0.0 +gfx950,256,3416,8192,512,ck,0,0,39.4131,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,727.05,1570.82,0.0 +gfx950,256,3399,8192,512,ck,0,0,39.4361,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.01,1562.62,0.0 +gfx950,256,3272,8192,512,ck,0,0,39.4372,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.98,1508.17,0.0 +gfx950,256,3315,8192,512,ck,0,0,39.4411,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.06,1526.44,0.0 +gfx950,256,3430,8192,512,ck,0,0,39.4557,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.25,1575.12,0.0 +gfx950,256,3317,8192,512,ck,0,0,39.4585,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.17,1526.63,0.0 +gfx950,256,3350,8192,512,ck,0,0,39.4645,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,712.08,1540.52,0.0 +gfx950,256,3359,8192,512,ck,0,0,39.4675,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,713.94,1544.26,0.0 +gfx950,256,3486,8192,512,ck,0,0,39.4703,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.88,1598.51,0.0 +gfx950,256,3364,8192,512,ck,0,0,39.4765,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.84,1546.05,0.0 +gfx950,256,3230,8192,512,ck,0,0,39.4835,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.24,1488.43,0.0 +gfx950,256,3404,8192,512,ck,0,0,39.4859,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.17,1562.79,0.0 +gfx950,256,3266,8192,512,ck,0,0,39.4887,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.8,1503.64,0.0 +gfx950,256,3380,8192,512,ck,0,0,39.4926,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,717.94,1552.26,0.0 +gfx950,256,3455,8192,512,ck,0,0,39.5048,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,733.65,1583.86,0.0 +gfx950,256,3296,8192,512,ck,0,0,39.5113,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.77,1515.6,0.0 +gfx950,256,3475,8192,512,ck,0,0,39.5335,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.36,1591.26,0.0 +gfx950,256,3295,8192,512,ck,0,0,39.5373,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.1,1514.18,0.0 +gfx950,256,3393,8192,512,ck,0,0,39.5433,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.78,1555.82,0.0 +gfx950,256,3356,8192,512,ck,0,0,39.5465,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.88,1539.89,0.0 +gfx950,256,3419,8192,512,ck,0,0,39.5553,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.08,1566.46,0.0 +gfx950,256,3245,8192,512,ck,0,0,39.5589,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.11,1492.0,0.0 +gfx950,256,3343,8192,512,ck,0,0,39.5627,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,708.83,1533.71,0.0 +gfx950,256,3524,8192,512,ck,0,0,39.5691,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.08,1610.75,0.0 +gfx950,256,3309,8192,512,ck,0,0,39.5932,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.08,1518.02,0.0 +gfx950,256,3354,8192,512,ck,0,0,39.6071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,710.36,1536.68,0.0 +gfx950,256,3435,8192,512,ck,0,0,39.6227,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,727.23,1570.62,0.0 +gfx950,256,3372,8192,512,ck,0,0,39.6292,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,713.78,1543.5,0.0 +gfx950,256,3444,8192,512,ck,0,0,39.6305,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.99,1574.14,0.0 +gfx950,256,3304,8192,512,ck,0,0,39.639,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.21,1514.13,0.0 +gfx950,256,3423,8192,512,ck,0,0,39.6398,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,724.38,1564.82,0.0 +gfx950,256,3450,8192,512,ck,0,0,39.6407,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.08,1576.3,0.0 +gfx950,256,3484,8192,512,ck,0,0,39.6423,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.24,1590.72,0.0 +gfx950,256,3376,8192,512,ck,0,0,39.649,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.27,1544.43,0.0 +gfx950,256,3527,8192,512,ck,0,0,39.6581,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,746.04,1608.41,0.0 +gfx950,256,3318,8192,512,ck,0,0,39.661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.78,1519.26,0.0 +gfx950,256,3494,8192,512,ck,0,0,39.6641,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,738.95,1594.11,0.0 +gfx950,256,3529,8192,512,ck,0,0,39.6664,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,746.31,1608.93,0.0 +gfx950,256,3441,8192,512,ck,0,0,39.669,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,727.65,1571.34,0.0 +gfx950,256,3308,8192,512,ck,0,0,39.6806,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.32,1514.25,0.0 +gfx950,256,3368,8192,512,ck,0,0,39.691,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.82,1539.39,0.0 +gfx950,256,3392,8192,512,ck,0,0,39.6962,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,716.8,1549.41,0.0 +gfx950,256,3320,8192,512,ck,0,0,39.7117,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.31,1518.17,0.0 +gfx950,256,3232,8192,512,ck,0,0,39.7231,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,682.52,1480.3,0.0 +gfx950,256,3452,8192,512,ck,0,0,39.7246,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.96,1573.82,0.0 +gfx950,256,3522,8192,512,ck,0,0,39.7283,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.67,1603.44,0.0 +gfx950,256,3458,8192,512,ck,0,0,39.7333,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.06,1576.02,0.0 +gfx950,256,3409,8192,512,ck,0,0,39.7372,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.65,1555.04,0.0 +gfx950,256,3427,8192,512,ck,0,0,39.7492,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.23,1562.22,0.0 +gfx950,256,3542,8192,512,ck,0,0,39.7508,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.47,1611.04,0.0 +gfx950,256,3492,8192,512,ck,0,0,39.766,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.63,1589.18,0.0 +gfx950,256,3518,8192,512,ck,0,0,39.7855,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.76,1599.44,0.0 +gfx950,256,3554,8192,512,ck,0,0,39.8098,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.89,1613.74,0.0 +gfx950,256,3322,8192,512,ck,0,0,39.8244,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.75,1514.72,0.0 +gfx950,256,3388,8192,512,ck,0,0,39.8262,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,713.62,1542.65,0.0 +gfx950,256,3335,8192,512,ck,0,0,39.8442,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.14,1519.48,0.0 +gfx950,256,3517,8192,512,ck,0,0,39.8663,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.04,1595.77,0.0 +gfx950,256,3421,8192,512,ck,0,0,39.874,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.7,1554.79,0.0 +gfx950,256,3474,8192,512,ck,0,0,39.8806,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.73,1576.98,0.0 +gfx950,256,3555,8192,512,ck,0,0,39.8834,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.72,1611.19,0.0 +gfx950,256,3398,8192,512,ck,0,0,39.8843,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.68,1544.64,0.0 +gfx950,256,3454,8192,512,ck,0,0,39.8902,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,726.35,1568.13,0.0 +gfx950,256,3538,8192,512,ck,0,0,39.8903,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.01,1603.71,0.0 +gfx950,256,3464,8192,512,ck,0,0,39.9083,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.12,1571.65,0.0 +gfx950,256,3488,8192,512,ck,0,0,39.9149,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,733.05,1581.55,0.0 +gfx950,256,3495,8192,512,ck,0,0,39.928,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,734.28,1584.0,0.0 +gfx950,256,3533,8192,512,ck,0,0,39.9315,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,742.19,1599.94,0.0 +gfx950,256,3552,8192,512,ck,0,0,39.9408,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,746.01,1607.6,0.0 +gfx950,256,3463,8192,512,ck,0,0,39.9425,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,727.29,1569.89,0.0 +gfx950,256,3433,8192,512,ck,0,0,39.9429,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,720.98,1557.18,0.0 +gfx950,256,3437,8192,512,ck,0,0,39.9466,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,721.75,1558.73,0.0 +gfx950,256,3424,8192,512,ck,0,0,39.9476,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.01,1553.19,0.0 +gfx950,256,3513,8192,512,ck,0,0,39.9898,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.92,1589.15,0.0 +gfx950,256,3516,8192,512,ck,0,0,39.9908,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.53,1590.38,0.0 +gfx950,256,3461,8192,512,ck,0,0,40.0041,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.75,1566.62,0.0 +gfx950,256,3571,8192,512,ck,0,0,40.0062,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.78,1613.0,0.0 +gfx950,256,3511,8192,512,ck,0,0,40.0102,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.12,1587.5,0.0 +gfx950,256,3512,8192,512,ck,0,0,40.0326,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.92,1587.03,0.0 +gfx950,256,3520,8192,512,ck,0,0,40.0402,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.46,1590.11,0.0 +gfx950,256,3519,8192,512,ck,0,0,40.0411,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.23,1589.65,0.0 +gfx950,256,3545,8192,512,ck,0,0,40.093,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.72,1598.55,0.0 +gfx950,256,3390,8192,512,ck,0,0,40.101,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.14,1532.92,0.0 +gfx950,256,3582,8192,512,ck,0,0,40.1028,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.27,1613.75,0.0 +gfx950,256,3371,8192,512,ck,0,0,40.1234,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.78,1524.07,0.0 +gfx950,256,3357,8192,512,ck,0,0,40.1338,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.67,1517.78,0.0 +gfx950,256,3570,8192,512,ck,0,0,40.175,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.42,1605.8,0.0 +gfx950,256,3530,8192,512,ck,0,0,40.203,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.56,1587.87,0.0 +gfx950,256,3470,8192,512,ck,0,0,40.2046,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,724.01,1562.59,0.0 +gfx950,256,3340,8192,512,ck,0,0,40.2122,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.75,1507.68,0.0 +gfx950,256,3561,8192,512,ck,0,0,40.2155,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,742.79,1600.4,0.0 +gfx950,256,3576,8192,512,ck,0,0,40.2168,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.9,1606.65,0.0 +gfx950,256,3541,8192,512,ck,0,0,40.2222,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,738.5,1591.73,0.0 +gfx950,256,3528,8192,512,ck,0,0,40.237,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.52,1585.69,0.0 +gfx950,256,3581,8192,512,ck,0,0,40.2378,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,746.55,1607.91,0.0 +gfx950,256,3428,8192,512,ck,0,0,40.2426,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.57,1543.48,0.0 +gfx950,256,3535,8192,512,ck,0,0,40.2479,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.78,1588.2,0.0 +gfx950,256,3468,8192,512,ck,0,0,40.2704,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,722.41,1559.2,0.0 +gfx950,256,3537,8192,512,ck,0,0,40.2718,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.76,1588.1,0.0 +gfx950,256,3572,8192,512,ck,0,0,40.2844,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.81,1602.28,0.0 +gfx950,256,3412,8192,512,ck,0,0,40.3302,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.69,1533.43,0.0 +gfx950,256,3482,8192,512,ck,0,0,40.3352,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,724.16,1562.56,0.0 +gfx950,256,3564,8192,512,ck,0,0,40.3727,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.53,1595.43,0.0 +gfx950,256,3515,8192,512,ck,0,0,40.3986,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.88,1573.91,0.0 +gfx950,256,3567,8192,512,ck,0,0,40.4268,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.16,1594.54,0.0 +gfx950,256,3580,8192,512,ck,0,0,40.4642,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,742.17,1598.5,0.0 +gfx950,256,3491,8192,512,ck,0,0,40.493,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.2,1560.23,0.0 +gfx950,256,3360,8192,512,ck,0,0,40.5552,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.0,1503.26,0.0 +gfx950,256,3579,8192,512,ck,0,0,40.584,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.77,1593.36,0.0 +gfx950,256,3574,8192,512,ck,0,0,40.6118,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,738.23,1590.19,0.0 +gfx950,256,3583,8192,512,ck,0,0,40.7838,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.97,1587.22,0.0 +gfx950,256,3575,8192,512,ck,0,0,41.1106,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.48,1571.31,0.0 +gfx950,256,200,9216,7168,ck,12,0,43.1747,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,612.03,1648.66,0.0 +gfx950,256,3712,8192,512,ck,0,0,43.298,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.17,1545.39,0.0 +gfx950,256,2112,6144,1536,ck,0,0,43.521,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,915.94,887.7,0.0 +gfx950,256,2146,6144,1536,ck,0,0,43.5582,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,929.89,897.73,0.0 +gfx950,256,3586,8192,512,ck,0,0,43.5905,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.09,1486.18,0.0 +gfx950,256,3615,8192,512,ck,0,0,43.6173,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.25,1496.5,0.0 +gfx950,256,3658,8192,512,ck,0,0,43.6229,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.43,1512.96,0.0 +gfx950,256,3589,8192,512,ck,0,0,43.6233,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.15,1486.23,0.0 +gfx950,256,2101,6144,1536,ck,0,0,43.6254,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,908.99,882.09,0.0 +gfx950,256,2087,6144,1536,ck,0,0,43.6373,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,902.69,877.41,0.0 +gfx950,256,3618,8192,512,ck,0,0,43.6432,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.41,1496.77,0.0 +gfx950,256,3608,8192,512,ck,0,0,43.6491,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.4,1492.7,0.0 +gfx950,256,3591,8192,512,ck,0,0,43.65,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.11,1486.09,0.0 +gfx950,256,3631,8192,512,ck,0,0,43.651,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.79,1501.54,0.0 +gfx950,256,2110,6144,1536,ck,0,0,43.6749,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,911.85,883.94,0.0 +gfx950,256,3650,8192,512,ck,0,0,43.6774,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.01,1507.98,0.0 +gfx950,256,3616,8192,512,ck,0,0,43.6804,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,694.44,1494.73,0.0 +gfx950,256,3610,8192,512,ck,0,0,43.6874,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.17,1492.17,0.0 +gfx950,256,3678,8192,512,ck,0,0,43.6942,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.12,1518.23,0.0 +gfx950,256,3659,8192,512,ck,0,0,43.695,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.46,1510.85,0.0 +gfx950,256,1599,8192,1536,ck,0,0,43.6993,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,920.84,943.65,0.0 +gfx950,256,3683,8192,512,ck,0,0,43.7012,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.97,1519.92,0.0 +gfx950,256,2054,6144,1536,ck,0,0,43.7036,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,887.07,865.64,0.0 +gfx950,256,2086,6144,1536,ck,0,0,43.7353,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,900.23,875.13,0.0 +gfx950,256,3673,8192,512,ck,0,0,43.7701,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.94,1513.67,0.0 +gfx950,256,3682,8192,512,ck,0,0,43.7722,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.63,1517.07,0.0 +gfx950,256,2161,6144,1536,ck,0,0,43.7942,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,931.35,897.63,0.0 +gfx950,256,3711,8192,512,ck,0,0,43.8037,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,710.67,1527.16,0.0 +gfx950,256,2128,6144,1536,ck,0,0,43.8071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,916.85,886.95,0.0 +gfx950,256,3676,8192,512,ck,0,0,43.8148,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.79,1513.28,0.0 +gfx950,256,3647,8192,512,ck,0,0,43.8242,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.09,1501.77,0.0 +gfx950,256,2111,6144,1536,ck,0,0,43.8414,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,908.82,880.89,0.0 +gfx950,256,3637,8192,512,ck,0,0,43.8478,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.8,1497.11,0.0 +gfx950,256,3708,8192,512,ck,0,0,43.8673,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.07,1523.79,0.0 +gfx950,256,3612,8192,512,ck,18,0,43.877,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,690.56,1486.49,0.0 +gfx950,256,1580,8192,1536,ck,0,0,43.8852,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,906.05,931.9,0.0 +gfx950,256,2127,6144,1536,ck,0,0,43.8872,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,914.75,885.02,0.0 +gfx950,256,3685,8192,512,ck,0,0,43.8892,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.32,1514.18,0.0 +gfx950,256,3654,8192,512,ck,0,0,43.8995,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.23,1501.89,0.0 +gfx950,256,2085,6144,1536,ck,0,0,43.9074,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,896.27,871.38,0.0 +gfx950,256,2164,6144,1536,ck,0,0,43.912,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,930.14,896.16,0.0 +gfx950,256,2090,6144,1536,ck,0,0,43.9149,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,898.27,872.81,0.0 +gfx950,256,3653,8192,512,ck,0,0,43.9187,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.73,1500.85,0.0 +gfx950,256,3684,8192,512,ck,0,0,43.9333,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.42,1512.27,0.0 +gfx950,256,3644,8192,512,ck,0,0,43.9411,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.66,1496.62,0.0 +gfx950,256,3627,8192,512,ck,0,0,43.953,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.23,1489.68,0.0 +gfx950,256,2088,6144,1536,ck,0,0,43.9544,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,896.6,871.4,0.0 +gfx950,256,3599,8192,512,ck,0,0,43.9616,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.75,1478.63,0.0 +gfx950,256,3670,8192,512,ck,0,0,43.9666,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.22,1505.75,0.0 +gfx950,256,3704,8192,512,ck,0,0,43.9668,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.7,1518.81,0.0 +gfx950,256,3666,8192,512,ck,0,0,43.9684,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.43,1504.15,0.0 +gfx950,256,3669,8192,512,ck,0,0,43.9723,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.94,1505.17,0.0 +gfx950,256,2126,6144,1536,ck,0,0,43.9786,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,912.42,882.86,0.0 +gfx950,256,3710,8192,512,ck,0,0,43.984,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.57,1520.52,0.0 +gfx950,256,3587,8192,512,ck,0,0,43.9926,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,683.98,1472.98,0.0 +gfx950,256,3667,8192,512,ck,0,0,43.9952,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.19,1503.62,0.0 +gfx950,256,3640,8192,512,ck,0,0,43.9954,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,694.04,1493.24,0.0 +gfx950,256,3623,8192,512,ck,18,0,44.0035,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,690.67,1486.44,0.0 +gfx950,256,1548,8192,1536,ck,0,0,44.004,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,885.3,916.35,0.0 +gfx950,256,2149,6144,1536,ck,0,0,44.0066,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,921.7,889.52,0.0 +gfx950,256,3624,8192,512,ck,0,0,44.0094,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.77,1486.62,0.0 +gfx950,256,2166,6144,1536,ck,0,0,44.0273,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,928.56,894.44,0.0 +gfx950,256,3625,8192,512,ck,0,0,44.0284,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.66,1486.37,0.0 +gfx950,256,3663,8192,512,ck,0,0,44.0322,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.84,1500.82,0.0 +gfx950,256,2129,6144,1536,ck,0,0,44.0351,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,912.53,882.67,0.0 +gfx950,256,216,9216,7168,ck,12,0,44.0439,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,647.95,1625.42,0.0 +gfx950,256,3634,8192,512,ck,0,0,44.0474,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.08,1489.18,0.0 +gfx950,256,2162,6144,1536,ck,0,0,44.0517,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,926.33,892.69,0.0 +gfx950,256,3695,8192,512,ck,0,0,44.0518,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.62,1512.42,0.0 +gfx950,256,1553,8192,1536,ck,0,0,44.0578,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,887.07,917.26,0.0 +gfx950,256,2150,6144,1536,ck,0,0,44.0728,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,920.75,888.5,0.0 +gfx950,256,3689,8192,512,ck,0,0,44.0774,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.07,1509.25,0.0 +gfx950,256,2091,6144,1536,ck,0,0,44.0875,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,895.18,869.71,0.0 +gfx950,256,2130,6144,1536,ck,0,0,44.1006,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,911.61,881.67,0.0 +gfx950,256,3697,8192,512,ck,0,0,44.1011,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.22,1511.5,0.0 +gfx950,256,3664,8192,512,ck,0,0,44.1219,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.61,1498.15,0.0 +gfx950,256,3694,8192,512,ck,0,0,44.122,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.31,1509.64,0.0 +gfx950,256,3617,8192,512,ck,0,0,44.1243,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.64,1480.07,0.0 +gfx950,256,1623,8192,1536,ck,0,0,44.136,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,925.42,944.06,0.0 +gfx950,256,2056,6144,1536,ck,0,0,44.1376,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,879.2,857.76,0.0 +gfx950,256,3674,8192,512,ck,0,0,44.1445,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.16,1501.21,0.0 +gfx950,256,3648,8192,512,ck,0,0,44.1504,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.12,1491.06,0.0 +gfx950,256,1639,8192,1536,ck,0,0,44.1524,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,934.19,950.2,0.0 +gfx950,256,3692,8192,512,ck,0,0,44.1614,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.31,1507.52,0.0 +gfx950,256,2160,6144,1536,ck,0,0,44.1873,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,922.63,889.33,0.0 +gfx950,256,3620,8192,512,ck,0,0,44.208,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.91,1478.42,0.0 +gfx950,256,2148,6144,1536,ck,0,0,44.2223,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,916.78,884.87,0.0 +gfx950,256,1601,8192,1536,ck,0,0,44.2551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,910.41,932.61,0.0 +gfx950,256,3585,8192,512,ck,18,0,44.2738,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,679.25,1462.86,0.0 +gfx950,256,2163,6144,1536,ck,0,0,44.2834,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,921.91,888.34,0.0 +gfx950,256,2218,6144,1536,ck,0,0,44.3388,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,944.17,904.37,0.0 +gfx950,256,3703,8192,512,ck,0,0,44.4398,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.99,1502.26,0.0 +gfx950,256,2116,6144,1536,ck,0,0,44.4428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,898.64,870.53,0.0 +gfx950,256,2189,6144,1536,ck,0,0,44.4618,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,929.25,892.85,0.0 +gfx950,256,2235,6144,1536,ck,0,0,44.5269,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,947.39,905.83,0.0 +gfx950,256,208,9216,7168,ck,12,0,44.5443,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,616.94,1602.56,0.0 +gfx950,256,2250,6144,1536,ck,0,0,44.5764,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,952.69,909.48,0.0 +gfx950,256,2304,6144,1536,ck,0,0,44.5987,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,975.06,925.76,0.0 +gfx950,256,2185,6144,1536,ck,0,0,44.5998,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,924.68,888.85,0.0 +gfx950,256,1590,8192,1536,ck,0,0,44.6015,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,897.14,920.95,0.0 +gfx950,256,2216,6144,1536,ck,0,0,44.609,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,937.6,898.28,0.0 +gfx950,256,2226,6144,1536,ck,0,0,44.616,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,941.69,901.23,0.0 +gfx950,256,2109,6144,1536,ck,0,0,44.6314,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,891.88,864.68,0.0 +gfx950,256,2248,6144,1536,ck,0,0,44.6337,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,950.62,907.69,0.0 +gfx950,256,1685,8192,1536,ck,0,0,44.664,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,949.41,957.78,0.0 +gfx950,256,2186,6144,1536,ck,0,0,44.6992,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,923.04,887.18,0.0 +gfx950,256,2207,6144,1536,ck,0,0,44.7894,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,930.04,891.88,0.0 +gfx950,256,2187,6144,1536,ck,0,0,44.8148,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,921.09,885.2,0.0 +gfx950,256,2249,6144,1536,ck,0,0,44.8436,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,946.59,903.75,0.0 +gfx950,256,2367,6144,1536,ck,0,0,44.8574,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,995.95,939.84,0.0 +gfx950,256,3840,8192,512,ck,0,0,44.8747,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,717.83,1539.28,0.0 +gfx950,256,2301,6144,1536,ck,0,0,44.931,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,966.59,917.99,0.0 +gfx950,256,2219,6144,1536,ck,0,0,44.9322,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,932.12,892.74,0.0 +gfx950,256,2254,6144,1536,ck,0,0,44.9566,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,946.31,903.01,0.0 +gfx950,256,2282,6144,1536,ck,0,0,44.957,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,958.06,911.62,0.0 +gfx950,256,2369,6144,1536,ck,0,0,44.977,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,994.14,937.95,0.0 +gfx950,256,2357,6144,1536,ck,0,0,45.0022,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,988.55,933.74,0.0 +gfx950,256,3727,8192,512,ck,18,0,45.0078,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,694.64,1492.31,0.0 +gfx950,256,1710,8192,1536,ck,0,0,45.047,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,955.3,959.58,0.0 +gfx950,256,3792,8192,512,ck,0,0,45.0921,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.44,1513.88,0.0 +gfx950,256,3804,8192,512,ck,0,0,45.1046,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.47,1517.95,0.0 +gfx950,256,2251,6144,1536,ck,0,0,45.109,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,941.86,899.04,0.0 +gfx950,256,2268,6144,1536,ck,0,0,45.1213,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,948.71,904.01,0.0 +gfx950,256,2247,6144,1536,ck,0,0,45.1246,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,939.86,897.51,0.0 +gfx950,256,3757,8192,512,ck,0,0,45.138,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.21,1499.24,0.0 +gfx950,256,3820,8192,512,ck,0,0,45.139,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.91,1522.79,0.0 +gfx950,256,3771,8192,512,ck,18,0,45.139,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,700.8,1504.44,0.0 +gfx950,256,3733,8192,512,ck,0,0,45.1974,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.84,1488.3,0.0 +gfx950,256,3809,8192,512,ck,0,0,45.2015,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.88,1516.57,0.0 +gfx950,256,2188,6144,1536,ck,0,0,45.2066,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,913.52,877.84,0.0 +gfx950,256,2217,6144,1536,ck,0,0,45.2322,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,925.1,886.2,0.0 +gfx950,256,3743,8192,512,ck,0,0,45.251,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.88,1490.27,0.0 +gfx950,256,3817,8192,512,ck,0,0,45.27,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.3,1517.26,0.0 +gfx950,256,3755,8192,512,ck,0,0,45.2794,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.66,1493.81,0.0 +gfx950,256,2252,6144,1536,ck,0,0,45.2798,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,938.72,895.96,0.0 +gfx950,256,3760,8192,512,ck,0,0,45.2975,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.31,1495.08,0.0 +gfx950,256,3784,8192,512,ck,0,0,45.3011,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.7,1503.91,0.0 +gfx950,256,3777,8192,512,ck,0,0,45.3016,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.4,1501.28,0.0 +gfx950,256,3802,8192,512,ck,0,0,45.3025,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.01,1510.58,0.0 +gfx950,256,3720,8192,512,ck,0,0,45.303,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.82,1479.98,0.0 +gfx950,256,2365,6144,1536,ck,0,0,45.3055,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,985.26,929.93,0.0 +gfx950,256,3783,8192,512,ck,0,0,45.3071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.42,1503.34,0.0 +gfx950,256,3765,8192,512,ck,18,0,45.3166,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,696.94,1496.31,0.0 +gfx950,256,2389,6144,1536,ck,0,0,45.3238,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,994.86,936.87,0.0 +gfx950,256,3769,8192,512,ck,0,0,45.3324,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.44,1497.28,0.0 +gfx950,256,3791,8192,512,ck,0,0,45.3345,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.48,1505.41,0.0 +gfx950,256,3762,8192,512,ck,0,0,45.3352,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.1,1494.58,0.0 +gfx950,256,3793,8192,512,ck,0,0,45.3435,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.71,1505.86,0.0 +gfx950,256,3811,8192,512,ck,0,0,45.3458,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.0,1512.49,0.0 +gfx950,256,3758,8192,512,ck,0,0,45.3692,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,694.84,1491.97,0.0 +gfx950,256,3761,8192,512,ck,18,0,45.3831,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,695.18,1492.63,0.0 +gfx950,256,1718,8192,1536,ck,0,0,45.3858,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,952.61,955.57,0.0 +gfx950,256,2414,6144,1536,ck,0,0,45.3902,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1003.8,943.12,0.0 +gfx950,256,3834,8192,512,ck,0,0,45.4057,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,708.32,1519.05,0.0 +gfx950,256,3734,8192,512,ck,18,0,45.4118,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,689.76,1481.64,0.0 +gfx950,256,3779,8192,512,ck,0,0,45.4133,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.05,1498.33,0.0 +gfx950,256,3822,8192,512,ck,0,0,45.4187,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.9,1514.15,0.0 +gfx950,256,3759,8192,512,ck,0,0,45.4206,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,694.24,1490.65,0.0 +gfx950,256,3716,8192,512,ck,0,0,45.4233,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.26,1474.57,0.0 +gfx950,256,3718,8192,512,ck,0,0,45.4297,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.53,1475.11,0.0 +gfx950,256,3750,8192,512,ck,0,0,45.4331,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.39,1486.9,0.0 +gfx950,256,3788,8192,512,ck,0,0,45.4547,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.07,1500.31,0.0 +gfx950,256,3781,8192,512,ck,0,0,45.4587,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.72,1497.58,0.0 +gfx950,256,3782,8192,512,ck,0,0,45.4595,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.89,1497.93,0.0 +gfx950,256,3719,8192,512,ck,0,0,45.4607,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.25,1474.47,0.0 +gfx950,256,2215,6144,1536,ck,0,0,45.4774,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,919.29,880.82,0.0 +gfx950,256,3813,8192,512,ck,0,0,45.4878,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.17,1508.51,0.0 +gfx950,256,3790,8192,512,ck,0,0,45.4974,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.78,1499.65,0.0 +gfx950,256,3831,8192,512,ck,0,0,45.5059,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,706.21,1514.59,0.0 +gfx950,256,2358,6144,1536,ck,0,0,45.5074,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,977.99,923.68,0.0 +gfx950,256,2390,6144,1536,ck,0,0,45.5166,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,991.06,933.21,0.0 +gfx950,256,3821,8192,512,ck,0,0,45.5645,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.46,1508.94,0.0 +gfx950,256,2412,6144,1536,ck,0,0,45.5745,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,998.91,938.7,0.0 +gfx950,256,3786,8192,512,ck,0,0,45.6069,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.37,1494.57,0.0 +gfx950,256,3795,8192,512,ck,0,0,45.6118,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.95,1497.74,0.0 +gfx950,256,3818,8192,512,ck,0,0,45.6264,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.96,1505.78,0.0 +gfx950,256,3807,8192,512,ck,0,0,45.6774,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.15,1500.03,0.0 +gfx950,256,3829,8192,512,ck,0,0,45.6787,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.17,1508.12,0.0 +gfx950,256,3838,8192,512,ck,0,0,45.6851,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.73,1511.24,0.0 +gfx950,256,2387,6144,1536,ck,0,0,45.7162,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,985.5,928.23,0.0 +gfx950,256,3825,8192,512,ck,0,0,45.7833,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.83,1503.2,0.0 +gfx950,256,3766,8192,512,ck,0,0,45.7982,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.8,1480.95,0.0 +gfx950,256,3714,8192,512,ck,0,0,45.869,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,679.22,1459.51,0.0 +gfx950,256,2356,6144,1536,ck,0,0,45.8746,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,969.34,915.68,0.0 +gfx950,256,2512,6144,1536,ck,0,0,45.8967,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1033.02,962.23,0.0 +gfx950,256,2461,6144,1536,ck,0,0,45.9125,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1011.7,946.54,0.0 +gfx950,256,3836,8192,512,ck,0,0,45.9873,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.73,1500.57,0.0 +gfx950,256,2344,6144,1536,ck,0,0,46.0091,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,961.58,909.4,0.0 +gfx950,256,3874,8192,512,ck,18,0,46.0378,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,705.89,1512.87,0.0 +gfx950,256,2378,6144,1536,ck,0,0,46.0602,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,974.45,918.59,0.0 +gfx950,256,3785,8192,512,ck,18,0,46.0651,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,689.26,1479.33,0.0 +gfx950,256,2368,6144,1536,ck,0,0,46.0822,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,969.89,915.16,0.0 +gfx950,256,2546,6144,1536,ck,0,0,46.1635,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1040.96,966.85,0.0 +gfx950,256,2355,6144,1536,ck,0,0,46.1927,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,962.25,909.08,0.0 +gfx950,256,2415,6144,1536,ck,0,0,46.2174,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,986.24,926.54,0.0 +gfx950,256,2548,6144,1536,ck,0,0,46.3038,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1038.62,964.52,0.0 +gfx950,256,2443,6144,1536,ck,0,0,46.3862,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,994.05,931.51,0.0 +gfx950,256,2486,6144,1536,ck,0,0,46.421,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1010.79,943.62,0.0 +gfx950,256,2483,6144,1536,ck,0,0,46.4767,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1008.36,941.59,0.0 +gfx950,256,2459,6144,1536,ck,0,0,46.5458,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,997.13,933.07,0.0 +gfx950,256,1816,8192,1536,ck,0,0,46.5526,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,981.71,969.35,0.0 +gfx950,256,1817,8192,1536,ck,0,0,46.5693,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,981.9,969.38,0.0 +gfx950,256,3968,8192,512,ck,0,0,46.6043,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.23,1528.56,0.0 +gfx950,256,3863,8192,512,ck,0,0,46.6201,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.09,1489.99,0.0 +gfx950,256,2403,6144,1536,ck,0,0,46.6279,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,972.7,914.82,0.0 +gfx950,256,3878,8192,512,ck,0,0,46.6459,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.4,1494.6,0.0 +gfx950,256,2534,6144,1536,ck,0,0,46.6982,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1024.19,952.23,0.0 +gfx950,256,3882,8192,512,ck,0,0,46.7069,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.21,1494.1,0.0 +gfx950,256,3890,8192,512,ck,18,0,46.725,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,698.38,1496.41,0.0 +gfx950,256,3870,8192,512,ck,0,0,46.7287,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,694.73,1489.06,0.0 +gfx950,256,2515,6144,1536,ck,0,0,46.7415,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1015.57,945.72,0.0 +gfx950,256,2440,6144,1536,ck,0,0,46.7531,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,985.04,923.31,0.0 +gfx950,256,3843,8192,512,ck,0,0,46.7555,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.49,1478.45,0.0 +gfx950,256,1808,8192,1536,ck,0,0,46.7608,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,973.03,961.97,0.0 +gfx950,256,2464,6144,1536,ck,0,0,46.7906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,993.93,929.66,0.0 +gfx950,256,3877,8192,512,ck,0,0,46.8175,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,694.67,1488.76,0.0 +gfx950,256,3904,8192,512,ck,0,0,46.8242,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.41,1498.29,0.0 +gfx950,256,2532,6144,1536,ck,0,0,46.8302,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1020.49,948.95,0.0 +gfx950,256,3887,8192,512,ck,0,0,46.8362,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.18,1491.77,0.0 +gfx950,256,3936,8192,512,ck,0,0,46.8614,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,704.58,1508.64,0.0 +gfx950,256,3935,8192,512,ck,0,0,46.9041,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.76,1506.91,0.0 +gfx950,256,3919,8192,512,ck,0,0,46.9126,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.77,1500.87,0.0 +gfx950,256,2445,6144,1536,ck,0,0,46.9167,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,983.61,921.57,0.0 +gfx950,256,3901,8192,512,ck,0,0,46.9226,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.4,1494.07,0.0 +gfx950,256,3926,8192,512,ck,0,0,46.9314,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.74,1502.79,0.0 +gfx950,256,3855,8192,512,ck,18,0,46.9478,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,688.81,1476.71,0.0 +gfx950,256,3861,8192,512,ck,0,0,46.9678,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.59,1478.24,0.0 +gfx950,256,1809,8192,1536,ck,0,0,46.9684,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,969.27,958.1,0.0 +gfx950,256,2444,6144,1536,ck,0,0,46.9746,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,982.0,920.14,0.0 +gfx950,256,3841,8192,512,ck,18,0,46.9914,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,685.67,1470.31,0.0 +gfx950,256,3860,8192,512,ck,0,0,46.9926,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.05,1477.1,0.0 +gfx950,256,3888,8192,512,ck,0,0,47.0055,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.85,1486.76,0.0 +gfx950,256,3921,8192,512,ck,0,0,47.0071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.72,1498.57,0.0 +gfx950,256,3909,8192,512,ck,18,0,47.0271,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,697.28,1493.62,0.0 +gfx950,256,3963,8192,512,ck,18,0,47.0322,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,706.84,1512.86,0.0 +gfx950,256,3858,8192,512,ck,0,0,47.0398,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.0,1474.9,0.0 +gfx950,256,3914,8192,512,ck,0,0,47.0399,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.98,1495.01,0.0 +gfx950,256,3879,8192,512,ck,0,0,47.0573,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.48,1481.89,0.0 +gfx950,256,3906,8192,512,ck,0,0,47.0602,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.26,1491.5,0.0 +gfx950,256,3947,8192,512,ck,0,0,47.0697,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,703.42,1505.91,0.0 +gfx950,256,3934,8192,512,ck,18,0,47.0758,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,701.01,1501.05,0.0 +gfx950,256,2441,6144,1536,ck,0,0,47.0762,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,978.68,917.27,0.0 +gfx950,256,3871,8192,512,ck,0,0,47.0799,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.73,1478.31,0.0 +gfx950,256,3876,8192,512,ck,0,0,47.0846,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.55,1479.96,0.0 +gfx950,256,3848,8192,512,ck,0,0,47.0956,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.4,1469.57,0.0 +gfx950,256,2529,6144,1536,ck,0,0,47.1031,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1013.38,942.57,0.0 +gfx950,256,3964,8192,512,ck,0,0,47.1187,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.72,1510.44,0.0 +gfx950,256,2479,6144,1536,ck,0,0,47.1198,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,992.99,927.57,0.0 +gfx950,256,3842,8192,512,ck,0,0,47.1282,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,683.86,1466.4,0.0 +gfx950,256,3899,8192,512,ck,0,0,47.131,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.96,1486.75,0.0 +gfx950,256,3950,8192,512,ck,0,0,47.1666,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.51,1503.89,0.0 +gfx950,256,2595,6144,1536,ck,0,0,47.1707,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1038.33,960.56,0.0 +gfx950,256,3944,8192,512,ck,0,0,47.1785,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.27,1501.36,0.0 +gfx950,256,3866,8192,512,ck,18,0,47.183,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,687.33,1473.29,0.0 +gfx950,256,3849,8192,512,ck,0,0,47.1865,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.26,1467.09,0.0 +gfx950,256,2596,6144,1536,ck,0,0,47.1934,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1038.24,960.39,0.0 +gfx950,256,3912,8192,512,ck,0,0,47.2011,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.24,1489.19,0.0 +gfx950,256,3923,8192,512,ck,0,0,47.2063,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.12,1492.96,0.0 +gfx950,256,3925,8192,512,ck,0,0,47.2096,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.43,1493.58,0.0 +gfx950,256,3928,8192,512,ck,0,0,47.2158,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.87,1494.45,0.0 +gfx950,256,2568,6144,1536,ck,0,0,47.259,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1025.61,950.87,0.0 +gfx950,256,3955,8192,512,ck,0,0,47.2679,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,701.89,1502.46,0.0 +gfx950,256,3929,8192,512,ck,0,0,47.2704,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.24,1493.08,0.0 +gfx950,256,3930,8192,512,ck,0,0,47.2722,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.39,1493.38,0.0 +gfx950,256,3922,8192,512,ck,0,0,47.2746,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.94,1490.45,0.0 +gfx950,256,3924,8192,512,ck,0,0,47.2811,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.2,1490.96,0.0 +gfx950,256,3958,8192,512,ck,0,0,47.2842,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.18,1503.01,0.0 +gfx950,256,3942,8192,512,ck,0,0,47.2922,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.23,1497.04,0.0 +gfx950,256,2544,6144,1536,ck,0,0,47.3235,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1014.64,942.56,0.0 +gfx950,256,2594,6144,1536,ck,0,0,47.3287,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1034.47,957.06,0.0 +gfx950,256,2593,6144,1536,ck,0,0,47.3371,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1033.89,956.6,0.0 +gfx950,256,2597,6144,1536,ck,0,0,47.3387,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1035.45,957.74,0.0 +gfx950,256,3916,8192,512,ck,0,0,47.3555,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.68,1485.76,0.0 +gfx950,256,3967,8192,512,ck,0,0,47.3749,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.43,1503.34,0.0 +gfx950,256,3913,8192,512,ck,0,0,47.437,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.96,1482.14,0.0 +gfx950,256,2567,6144,1536,ck,0,0,47.4522,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1021.04,946.71,0.0 +gfx950,256,3961,8192,512,ck,0,0,47.5007,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.51,1497.23,0.0 +gfx950,256,3946,8192,512,ck,0,0,47.5312,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.42,1490.93,0.0 +gfx950,256,3885,8192,512,ck,0,0,47.5829,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.9,1467.65,0.0 +gfx950,256,1821,8192,1536,ck,0,0,47.6472,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,961.8,948.96,0.0 +gfx950,256,2570,6144,1536,ck,0,0,47.7894,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1015.02,940.9,0.0 +gfx950,256,1980,8192,1536,ck,0,0,47.8367,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1041.63,1004.76,0.0 +gfx950,256,2636,6144,1536,ck,0,0,47.9002,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1038.68,957.77,0.0 +gfx950,256,2658,6144,1536,ck,0,0,47.9672,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1045.88,962.77,0.0 +gfx950,256,2569,6144,1536,ck,0,0,48.0734,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1008.63,935.05,0.0 +gfx950,256,1968,8192,1536,ck,0,0,48.0832,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1030.01,995.14,0.0 +gfx950,256,1934,8192,1536,ck,0,0,48.2551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1008.61,978.97,0.0 +gfx950,256,3969,8192,512,ck,18,0,48.3055,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,689.25,1475.08,0.0 +gfx950,256,3976,8192,512,ck,18,0,48.3507,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,689.82,1476.15,0.0 +gfx950,256,1991,8192,1536,ck,0,0,48.3532,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1036.23,998.11,0.0 +gfx950,256,3980,8192,512,ck,18,0,48.3738,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,690.18,1476.84,0.0 +gfx950,256,4021,8192,512,ck,18,0,48.4482,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,696.22,1488.87,0.0 +gfx950,256,232,9216,7168,ck,18,0,48.461,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,632.51,1485.72,0.0 +gfx950,256,3975,8192,512,ck,0,0,48.4992,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.53,1471.28,0.0 +gfx950,256,3972,8192,512,ck,0,0,48.5005,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.99,1470.2,0.0 +gfx950,256,3981,8192,512,ck,0,0,48.5447,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.92,1471.99,0.0 +gfx950,256,2032,8192,1536,ck,0,0,48.548,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1053.33,1009.24,0.0 +gfx950,256,3970,8192,512,ck,0,0,48.5655,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.73,1467.53,0.0 +gfx950,256,2037,8192,1536,ck,0,0,48.5683,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1055.48,1010.66,0.0 +gfx950,256,2012,8192,1536,ck,0,0,48.5696,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1042.5,1001.41,0.0 +gfx950,256,2003,8192,1536,ck,0,0,48.5878,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1037.44,997.71,0.0 +gfx950,256,3979,8192,512,ck,0,0,48.5905,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.93,1469.91,0.0 +gfx950,256,1944,8192,1536,ck,0,0,48.5964,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1006.71,975.78,0.0 +gfx950,256,4006,8192,512,ck,18,0,48.5987,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,691.47,1479.05,0.0 +gfx950,256,3973,8192,512,ck,0,0,48.6226,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.44,1466.85,0.0 +gfx950,256,3985,8192,512,ck,0,0,48.6293,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.42,1470.82,0.0 +gfx950,256,2000,8192,1536,ck,0,0,48.6855,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1033.81,994.61,0.0 +gfx950,256,4000,8192,512,ck,0,0,48.6866,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.19,1474.29,0.0 +gfx950,256,4005,8192,512,ck,0,0,48.7212,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.56,1474.98,0.0 +gfx950,256,3978,8192,512,ck,18,0,48.7359,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,684.71,1465.17,0.0 +gfx950,256,2009,8192,1536,ck,0,0,48.7383,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1037.34,996.84,0.0 +gfx950,256,3974,8192,512,ck,0,0,48.7664,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,683.59,1462.87,0.0 +gfx950,256,4002,8192,512,ck,18,0,48.7731,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,688.31,1472.37,0.0 +gfx950,256,3994,8192,512,ck,0,0,48.7748,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.91,1469.55,0.0 +gfx950,256,3988,8192,512,ck,0,0,48.7913,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.65,1466.97,0.0 +gfx950,256,4004,8192,512,ck,18,0,48.7971,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,688.32,1472.34,0.0 +gfx950,256,4009,8192,512,ck,0,0,48.8195,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.86,1473.39,0.0 +gfx950,256,2002,8192,1536,ck,0,0,48.822,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1031.95,992.56,0.0 +gfx950,256,4023,8192,512,ck,18,0,48.8562,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,690.75,1477.13,0.0 +gfx950,256,2041,8192,1536,ck,0,0,48.8672,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1051.08,1005.94,0.0 +gfx950,256,3997,8192,512,ck,18,0,48.9399,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,685.11,1465.63,0.0 +gfx950,256,1987,8192,1536,ck,0,0,48.962,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1021.29,984.23,0.0 +gfx950,256,4036,8192,512,ck,0,0,48.9725,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.34,1478.11,0.0 +gfx950,256,4012,8192,512,ck,0,0,48.9764,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.17,1469.71,0.0 +gfx950,256,2011,8192,1536,ck,0,0,48.9768,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1033.32,992.72,0.0 +gfx950,256,4016,8192,512,ck,0,0,48.9789,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.82,1471.01,0.0 +gfx950,256,1978,8192,1536,ck,0,0,48.9836,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1016.22,980.51,0.0 +gfx950,256,4032,8192,512,ck,0,0,49.0015,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.24,1475.85,0.0 +gfx950,256,2035,8192,1536,ck,0,0,49.002,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1045.11,1000.98,0.0 +gfx950,256,4010,8192,512,ck,18,0,49.0155,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,686.28,1467.85,0.0 +gfx950,256,4007,8192,512,ck,0,0,49.0492,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.29,1465.81,0.0 +gfx950,256,4414,8192,512,ck,0,0,49.0816,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.4,1604.94,0.0 +gfx950,256,4017,8192,512,ck,0,0,49.0959,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.35,1467.85,0.0 +gfx950,256,4368,8192,512,ck,0,0,49.1293,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.82,1587.57,0.0 +gfx950,256,2004,8192,1536,ck,0,0,49.1359,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1026.38,986.95,0.0 +gfx950,256,4226,8192,512,ck,0,0,49.1461,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,721.32,1538.21,0.0 +gfx950,256,4037,8192,512,ck,0,0,49.1468,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.05,1473.21,0.0 +gfx950,256,2006,8192,1536,ck,0,0,49.1511,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1027.09,987.37,0.0 +gfx950,256,4043,8192,512,ck,0,0,49.1781,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.64,1474.33,0.0 +gfx950,256,4040,8192,512,ck,0,0,49.1906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.95,1472.93,0.0 +gfx950,256,4018,8192,512,ck,0,0,49.1974,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.11,1465.17,0.0 +gfx950,256,4044,8192,512,ck,0,0,49.2611,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.65,1472.19,0.0 +gfx950,256,4055,8192,512,ck,0,0,49.2697,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,690.4,1475.71,0.0 +gfx950,256,4411,8192,512,ck,0,0,49.2919,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.67,1597.07,0.0 +gfx950,256,4099,8192,512,ck,18,0,49.3115,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,697.3,1489.53,0.0 +gfx950,256,4114,8192,512,ck,0,0,49.3275,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.62,1494.19,0.0 +gfx950,256,4372,8192,512,ck,0,0,49.3499,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.16,1581.84,0.0 +gfx950,256,4502,8192,512,ck,0,0,49.3661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.01,1625.81,0.0 +gfx950,256,4048,8192,512,ck,0,0,49.3682,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.83,1470.37,0.0 +gfx950,256,4401,8192,512,ck,0,0,49.3894,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.49,1590.5,0.0 +gfx950,256,4046,8192,512,ck,0,0,49.3919,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.16,1468.98,0.0 +gfx950,256,4104,8192,512,ck,0,0,49.4003,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,696.9,1488.56,0.0 +gfx950,256,4059,8192,512,ck,0,0,49.4021,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.23,1473.12,0.0 +gfx950,256,4049,8192,512,ck,0,0,49.4163,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.33,1469.28,0.0 +gfx950,256,4569,8192,512,ck,0,0,49.4189,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.56,1646.98,0.0 +gfx950,256,4447,8192,512,ck,0,0,49.4255,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.75,1605.06,0.0 +gfx950,256,4113,8192,512,ck,18,0,49.4259,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,698.06,1490.87,0.0 +gfx950,256,4499,8192,512,ck,0,0,49.4371,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.4,1622.45,0.0 +gfx950,256,4060,8192,512,ck,18,0,49.4495,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,688.74,1472.05,0.0 +gfx950,256,4245,8192,512,ck,0,0,49.4673,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.86,1534.71,0.0 +gfx950,256,4105,8192,512,ck,0,0,49.4826,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,695.91,1486.43,0.0 +gfx950,256,4053,8192,512,ck,0,0,49.4844,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.07,1468.62,0.0 +gfx950,256,4051,8192,512,ck,0,0,49.4848,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.72,1467.93,0.0 +gfx950,256,4454,8192,512,ck,0,0,49.4887,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.98,1605.4,0.0 +gfx950,256,4069,8192,512,ck,0,0,49.4942,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.64,1473.79,0.0 +gfx950,256,4608,8192,512,ck,0,0,49.5111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.73,1657.23,0.0 +gfx950,256,4062,8192,512,ck,0,0,49.5219,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.07,1470.58,0.0 +gfx950,256,4425,8192,512,ck,0,0,49.5399,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.29,1593.85,0.0 +gfx950,256,4052,8192,512,ck,0,0,49.5425,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.09,1466.56,0.0 +gfx950,256,4239,8192,512,ck,0,0,49.5462,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,717.7,1530.22,0.0 +gfx950,256,4360,8192,512,ck,0,0,49.5612,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.96,1571.0,0.0 +gfx950,256,4444,8192,512,ck,0,0,49.5632,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.15,1599.58,0.0 +gfx950,256,4070,8192,512,ck,0,0,49.5665,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.8,1471.98,0.0 +gfx950,256,4095,8192,512,ck,18,0,49.5715,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,692.97,1480.36,0.0 +gfx950,256,4309,8192,512,ck,0,0,49.59,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.91,1552.72,0.0 +gfx950,256,4101,8192,512,ck,0,0,49.5959,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.64,1481.67,0.0 +gfx950,256,240,9216,7168,ck,18,0,49.6028,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,639.26,1455.65,0.0 +gfx950,256,4279,8192,512,ck,0,0,49.6038,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.63,1542.07,0.0 +gfx950,256,4057,8192,512,ck,0,0,49.605,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,686.07,1466.41,0.0 +gfx950,256,4130,8192,512,ck,18,0,49.6263,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,698.12,1490.64,0.0 +gfx950,256,4124,8192,512,ck,18,0,49.6427,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,696.87,1488.1,0.0 +gfx950,256,4453,8192,512,ck,0,0,49.6448,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.43,1600.01,0.0 +gfx950,256,4408,8192,512,ck,0,0,49.6548,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.68,1584.38,0.0 +gfx950,256,4596,8192,512,ck,0,0,49.6555,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.43,1648.32,0.0 +gfx950,256,4376,8192,512,ck,0,0,49.6616,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.17,1573.27,0.0 +gfx950,256,4395,8192,512,ck,0,0,49.6687,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,742.28,1579.51,0.0 +gfx950,256,4324,8192,512,ck,0,0,49.6808,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.11,1554.98,0.0 +gfx950,256,4450,8192,512,ck,0,0,49.6902,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.24,1597.53,0.0 +gfx950,256,4355,8192,512,ck,0,0,49.6927,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.17,1565.15,0.0 +gfx950,256,4570,8192,512,ck,0,0,49.7158,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.1,1637.49,0.0 +gfx950,256,4074,8192,512,ck,0,0,49.7191,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.37,1468.82,0.0 +gfx950,256,4451,8192,512,ck,0,0,49.7219,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.93,1596.85,0.0 +gfx950,256,4465,8192,512,ck,0,0,49.7228,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.28,1601.58,0.0 +gfx950,256,4067,8192,512,ck,0,0,49.7456,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.82,1465.66,0.0 +gfx950,256,4402,8192,512,ck,0,0,49.7472,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,742.29,1579.4,0.0 +gfx950,256,4432,8192,512,ck,0,0,49.7482,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.33,1589.55,0.0 +gfx950,256,4252,8192,512,ck,0,0,49.7523,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,716.92,1528.29,0.0 +gfx950,256,248,9216,7168,ck,18,0,49.7701,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,658.35,1454.87,0.0 +gfx950,256,4494,8192,512,ck,0,0,49.7743,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.39,1609.77,0.0 +gfx950,256,4420,8192,512,ck,0,0,49.7746,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.91,1584.64,0.0 +gfx950,256,4128,8192,512,ck,18,0,49.7815,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,695.6,1485.31,0.0 +gfx950,256,4242,8192,512,ck,0,0,49.7936,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.64,1523.63,0.0 +gfx950,256,4366,8192,512,ck,0,0,49.8125,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.25,1565.11,0.0 +gfx950,256,4571,8192,512,ck,0,0,49.8159,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.72,1634.54,0.0 +gfx950,256,4330,8192,512,ck,0,0,49.8177,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.11,1552.74,0.0 +gfx950,256,4508,8192,512,ck,0,0,49.8291,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.91,1612.74,0.0 +gfx950,256,4356,8192,512,ck,0,0,49.8294,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,733.32,1561.19,0.0 +gfx950,256,4398,8192,512,ck,0,0,49.8342,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.32,1575.28,0.0 +gfx950,256,4577,8192,512,ck,0,0,49.8419,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.33,1635.72,0.0 +gfx950,256,4271,8192,512,ck,0,0,49.8428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,718.81,1531.96,0.0 +gfx950,256,4467,8192,512,ck,0,0,49.8428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.8,1598.4,0.0 +gfx950,256,4142,8192,512,ck,0,0,49.8469,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.05,1488.11,0.0 +gfx950,256,4225,8192,512,ck,0,0,49.8553,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,710.89,1515.99,0.0 +gfx950,256,4300,8192,512,ck,0,0,49.8619,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.42,1541.2,0.0 +gfx950,256,4523,8192,512,ck,0,0,49.8622,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.93,1616.75,0.0 +gfx950,256,4073,8192,512,ck,0,0,49.864,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.2,1464.22,0.0 +gfx950,256,4578,8192,512,ck,0,0,49.8719,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.03,1635.07,0.0 +gfx950,256,4143,8192,512,ck,18,0,49.8731,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,696.85,1487.66,0.0 +gfx950,256,4455,8192,512,ck,0,0,49.8733,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.32,1593.36,0.0 +gfx950,256,4118,8192,512,ck,0,0,49.8739,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.63,1479.17,0.0 +gfx950,256,4423,8192,512,ck,0,0,49.8779,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.87,1582.37,0.0 +gfx950,256,4307,8192,512,ck,0,0,49.8783,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,724.36,1543.06,0.0 +gfx950,256,4378,8192,512,ck,0,0,49.8788,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.29,1567.1,0.0 +gfx950,256,4405,8192,512,ck,0,0,49.8791,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.83,1576.24,0.0 +gfx950,256,4361,8192,512,ck,0,0,49.8852,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,733.34,1561.14,0.0 +gfx950,256,4116,8192,512,ck,0,0,49.8872,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.11,1478.1,0.0 +gfx950,256,4427,8192,512,ck,0,0,49.8879,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.4,1583.41,0.0 +gfx950,256,4134,8192,512,ck,18,0,49.8879,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,695.13,1484.17,0.0 +gfx950,256,4359,8192,512,ck,0,0,49.8888,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,732.95,1560.35,0.0 +gfx950,256,4235,8192,512,ck,0,0,49.8968,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.98,1518.11,0.0 +gfx950,256,4117,8192,512,ck,18,0,49.9091,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,691.98,1477.79,0.0 +gfx950,256,4301,8192,512,ck,0,0,49.9121,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,722.86,1539.99,0.0 +gfx950,256,4315,8192,512,ck,0,0,49.9166,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.15,1544.59,0.0 +gfx950,256,4292,8192,512,ck,0,0,49.9199,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,721.23,1536.7,0.0 +gfx950,256,4318,8192,512,ck,0,0,49.9211,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.59,1545.46,0.0 +gfx950,256,4388,8192,512,ck,0,0,49.9277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.25,1568.95,0.0 +gfx950,256,4236,8192,512,ck,0,0,49.9327,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,711.64,1517.36,0.0 +gfx950,256,4407,8192,512,ck,0,0,49.934,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.35,1575.18,0.0 +gfx950,256,4280,8192,512,ck,0,0,49.944,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,718.87,1531.9,0.0 +gfx950,256,4323,8192,512,ck,0,0,49.9518,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.98,1546.2,0.0 +gfx950,256,4532,8192,512,ck,0,0,49.9659,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.86,1616.44,0.0 +gfx950,256,4168,8192,512,ck,18,0,49.9747,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,699.63,1493.09,0.0 +gfx950,256,4200,8192,512,ck,18,0,49.9767,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,704.97,1503.85,0.0 +gfx950,256,4516,8192,512,ck,0,0,49.9791,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.98,1610.61,0.0 +gfx950,256,4500,8192,512,ck,0,0,49.9801,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.28,1605.16,0.0 +gfx950,256,4394,8192,512,ck,0,0,49.9814,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.47,1569.29,0.0 +gfx950,256,4348,8192,512,ck,0,0,49.9829,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.72,1553.69,0.0 +gfx950,256,4261,8192,512,ck,0,0,49.983,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,715.12,1524.28,0.0 +gfx950,256,4538,8192,512,ck,0,0,49.9835,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.6,1617.9,0.0 +gfx950,256,4535,8192,512,ck,0,0,49.9838,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.09,1616.88,0.0 +gfx950,256,4131,8192,512,ck,0,0,49.9987,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.08,1479.87,0.0 +gfx950,256,4163,8192,512,ck,18,0,50.0011,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,698.42,1490.61,0.0 +gfx950,256,4185,8192,512,ck,0,0,50.0012,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.11,1498.05,0.0 +gfx950,256,4223,8192,512,ck,18,0,50.0032,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,708.46,1510.83,0.0 +gfx950,256,4357,8192,512,ck,0,0,50.0066,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.89,1556.0,0.0 +gfx950,256,4566,8192,512,ck,0,0,50.008,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.93,1626.57,0.0 +gfx950,256,4275,8192,512,ck,0,0,50.0096,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,717.09,1528.2,0.0 +gfx950,256,4460,8192,512,ck,0,0,50.0152,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.04,1590.53,0.0 +gfx950,256,4510,8192,512,ck,0,0,50.0299,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.2,1606.94,0.0 +gfx950,256,4493,8192,512,ck,0,0,50.0375,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.24,1600.96,0.0 +gfx950,256,4436,8192,512,ck,0,0,50.0452,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.57,1581.47,0.0 +gfx950,256,4438,8192,512,ck,0,0,50.0466,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.88,1582.1,0.0 +gfx950,256,4552,8192,512,ck,0,0,50.0486,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.96,1620.52,0.0 +gfx950,256,4551,8192,512,ck,0,0,50.0491,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.78,1620.17,0.0 +gfx950,256,4580,8192,512,ck,0,0,50.0503,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.62,1629.92,0.0 +gfx950,256,4320,8192,512,ck,0,0,50.0554,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.97,1541.99,0.0 +gfx950,256,4088,8192,512,ck,0,0,50.0563,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.08,1463.65,0.0 +gfx950,256,4310,8192,512,ck,0,0,50.06,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,722.23,1538.48,0.0 +gfx950,256,4321,8192,512,ck,0,0,50.0605,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,724.07,1542.17,0.0 +gfx950,256,4153,8192,512,ck,18,0,50.0639,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,695.87,1485.37,0.0 +gfx950,256,4573,8192,512,ck,0,0,50.0654,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.22,1627.07,0.0 +gfx950,256,4592,8192,512,ck,0,0,50.0731,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.29,1633.23,0.0 +gfx950,256,4563,8192,512,ck,0,0,50.0744,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.41,1623.4,0.0 +gfx950,256,4303,8192,512,ck,0,0,50.0791,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,720.78,1535.53,0.0 +gfx950,256,4273,8192,512,ck,0,0,50.0828,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,715.71,1525.29,0.0 +gfx950,256,4603,8192,512,ck,0,0,50.0855,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.94,1636.53,0.0 +gfx950,256,4363,8192,512,ck,0,0,50.0865,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.73,1555.54,0.0 +gfx950,256,4227,8192,512,ck,0,0,50.089,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.91,1509.59,0.0 +gfx950,256,4506,8192,512,ck,0,0,50.0903,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.62,1603.66,0.0 +gfx950,256,4572,8192,512,ck,0,0,50.0968,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.57,1625.71,0.0 +gfx950,256,4586,8192,512,ck,0,0,50.1017,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.84,1630.27,0.0 +gfx950,256,4145,8192,512,ck,0,0,50.122,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.72,1480.95,0.0 +gfx950,256,4527,8192,512,ck,0,0,50.1231,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.64,1609.69,0.0 +gfx950,256,4246,8192,512,ck,0,0,50.1237,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,710.6,1514.95,0.0 +gfx950,256,4329,8192,512,ck,0,0,50.1295,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,724.41,1542.75,0.0 +gfx950,256,4240,8192,512,ck,0,0,50.1377,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,709.4,1512.5,0.0 +gfx950,256,4575,8192,512,ck,0,0,50.1468,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.31,1625.1,0.0 +gfx950,256,4317,8192,512,ck,0,0,50.1536,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,722.05,1537.96,0.0 +gfx950,256,4393,8192,512,ck,0,0,50.1548,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,734.75,1563.53,0.0 +gfx950,256,4178,8192,512,ck,18,0,50.1648,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,698.65,1490.8,0.0 +gfx950,256,4306,8192,512,ck,0,0,50.171,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.96,1533.72,0.0 +gfx950,256,4501,8192,512,ck,0,0,50.1723,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.55,1599.35,0.0 +gfx950,256,4120,8192,512,ck,0,0,50.1791,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.75,1470.85,0.0 +gfx950,256,4174,8192,512,ck,18,0,50.1812,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,697.75,1488.97,0.0 +gfx950,256,4269,8192,512,ck,0,0,50.1878,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,713.54,1520.75,0.0 +gfx950,256,4542,8192,512,ck,0,0,50.1933,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.09,1612.48,0.0 +gfx950,256,4495,8192,512,ck,0,0,50.1943,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.22,1596.63,0.0 +gfx950,256,4184,8192,512,ck,0,0,50.1975,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,699.2,1491.85,0.0 +gfx950,256,4251,8192,512,ck,0,0,50.2012,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,710.34,1514.29,0.0 +gfx950,256,4350,8192,512,ck,0,0,50.2021,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,726.87,1547.58,0.0 +gfx950,256,4325,8192,512,ck,0,0,50.2094,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,722.59,1538.94,0.0 +gfx950,256,4308,8192,512,ck,0,0,50.2099,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.74,1533.21,0.0 +gfx950,256,4531,8192,512,ck,0,0,50.2167,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.9,1608.03,0.0 +gfx950,256,4492,8192,512,ck,0,0,50.2216,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.31,1594.75,0.0 +gfx950,256,4150,8192,512,ck,0,0,50.2226,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,693.17,1479.67,0.0 +gfx950,256,4274,8192,512,ck,0,0,50.2347,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,713.71,1521.02,0.0 +gfx950,256,4397,8192,512,ck,0,0,50.2436,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,734.12,1562.11,0.0 +gfx950,256,4238,8192,512,ck,0,0,50.2463,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,707.53,1508.56,0.0 +gfx950,256,4129,8192,512,ck,0,0,50.2515,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,689.26,1471.75,0.0 +gfx950,256,4346,8192,512,ck,0,0,50.264,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.31,1544.33,0.0 +gfx950,256,4530,8192,512,ck,0,0,50.2655,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.99,1606.14,0.0 +gfx950,256,4354,8192,512,ck,0,0,50.2734,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,726.51,1546.73,0.0 +gfx950,256,4470,8192,512,ck,0,0,50.276,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.82,1585.64,0.0 +gfx950,256,4367,8192,512,ck,0,0,50.284,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.52,1550.77,0.0 +gfx950,256,4443,8192,512,ck,0,0,50.2885,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.14,1576.17,0.0 +gfx950,256,4441,8192,512,ck,0,0,50.2937,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.73,1575.34,0.0 +gfx950,256,4497,8192,512,ck,0,0,50.2961,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.03,1594.07,0.0 +gfx950,256,4322,8192,512,ck,0,0,50.2985,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,720.81,1535.21,0.0 +gfx950,256,4383,8192,512,ck,0,0,50.3034,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.91,1555.55,0.0 +gfx950,256,4606,8192,512,ck,0,0,50.3102,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.99,1630.23,0.0 +gfx950,256,4541,8192,512,ck,0,0,50.3106,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.15,1608.39,0.0 +gfx950,256,4534,8192,512,ck,0,0,50.3111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.98,1606.02,0.0 +gfx950,256,4156,8192,512,ck,0,0,50.3179,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,692.86,1478.88,0.0 +gfx950,256,4182,8192,512,ck,18,0,50.3255,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,697.09,1487.38,0.0 +gfx950,256,4496,8192,512,ck,0,0,50.3319,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.33,1592.6,0.0 +gfx950,256,4136,8192,512,ck,18,0,50.3319,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,689.33,1471.75,0.0 +gfx950,256,4545,8192,512,ck,0,0,50.3439,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.32,1608.67,0.0 +gfx950,256,4546,8192,512,ck,0,0,50.3547,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.32,1608.66,0.0 +gfx950,256,4213,8192,512,ck,18,0,50.3627,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,701.73,1496.69,0.0 +gfx950,256,4282,8192,512,ck,0,0,50.3727,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,713.09,1519.53,0.0 +gfx950,256,4108,8192,512,ck,0,0,50.3751,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.08,1461.1,0.0 +gfx950,256,4277,8192,512,ck,0,0,50.382,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,712.12,1517.58,0.0 +gfx950,256,4464,8192,512,ck,0,0,50.3992,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.0,1579.75,0.0 +gfx950,256,4507,8192,512,ck,0,0,50.4035,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.1,1594.03,0.0 +gfx950,256,4112,8192,512,ck,0,0,50.404,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,684.35,1461.6,0.0 +gfx950,256,4347,8192,512,ck,0,0,50.4217,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.21,1539.84,0.0 +gfx950,256,4342,8192,512,ck,0,0,50.4396,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,722.12,1537.62,0.0 +gfx950,256,4159,8192,512,ck,18,0,50.4443,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,691.62,1476.18,0.0 +gfx950,256,4503,8192,512,ck,0,0,50.4724,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.41,1590.51,0.0 +gfx950,256,4140,8192,512,ck,0,0,50.4803,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,687.97,1468.77,0.0 +gfx950,256,4349,8192,512,ck,0,0,50.4823,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,722.67,1538.66,0.0 +gfx950,256,4147,8192,512,ck,18,0,50.4903,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,688.99,1470.82,0.0 +gfx950,256,4568,8192,512,ck,0,0,50.4946,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.88,1611.56,0.0 +gfx950,256,4601,8192,512,ck,0,0,50.4995,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.28,1622.45,0.0 +gfx950,256,4212,8192,512,ck,18,0,50.5011,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,699.64,1492.25,0.0 +gfx950,256,4218,8192,512,ck,0,0,50.5171,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,700.42,1493.78,0.0 +gfx950,256,4509,8192,512,ck,0,0,50.5345,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.48,1590.56,0.0 +gfx950,256,4556,8192,512,ck,0,0,50.5537,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.0,1605.67,0.0 +gfx950,256,4561,8192,512,ck,0,0,50.5678,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.62,1606.89,0.0 +gfx950,256,4152,8192,512,ck,0,0,50.585,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.53,1469.73,0.0 +gfx950,256,4519,8192,512,ck,0,0,50.5879,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.35,1592.23,0.0 +gfx950,256,4504,8192,512,ck,0,0,50.6387,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,746.11,1585.62,0.0 +gfx950,256,4524,8192,512,ck,0,0,50.667,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.01,1591.41,0.0 +gfx950,256,4498,8192,512,ck,0,0,50.6691,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.67,1582.67,0.0 +gfx950,256,4533,8192,512,ck,0,0,50.6879,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.19,1593.75,0.0 +gfx950,256,4512,8192,512,ck,0,0,50.6997,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,746.54,1586.38,0.0 +gfx950,256,4522,8192,512,ck,0,0,50.7083,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.07,1589.44,0.0 +gfx950,256,4217,8192,512,ck,0,0,50.7281,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,697.34,1487.24,0.0 +gfx950,256,4222,8192,512,ck,0,0,50.7326,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,698.11,1488.77,0.0 +gfx950,256,4461,8192,512,ck,0,0,50.7356,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.58,1568.27,0.0 +gfx950,256,4600,8192,512,ck,0,0,50.7371,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.54,1614.52,0.0 +gfx950,256,4468,8192,512,ck,0,0,50.7502,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,738.53,1570.15,0.0 +gfx950,256,4170,8192,512,ck,18,0,50.7564,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,689.18,1470.76,0.0 +gfx950,256,4176,8192,512,ck,18,0,50.8209,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,689.3,1470.89,0.0 +gfx950,256,4459,8192,512,ck,0,0,50.8287,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.9,1564.74,0.0 +gfx950,256,4598,8192,512,ck,0,0,50.8479,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.55,1610.33,0.0 +gfx950,256,4343,8192,512,ck,0,0,50.8739,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,716.12,1524.82,0.0 +gfx950,256,4202,8192,512,ck,18,0,50.878,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,692.81,1477.87,0.0 +gfx950,256,4564,8192,512,ck,0,0,50.9131,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.98,1596.99,0.0 +gfx950,256,4177,8192,512,ck,0,0,50.9131,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,688.22,1468.56,0.0 +gfx950,256,4265,8192,512,ck,0,0,50.9291,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,702.49,1497.29,0.0 +gfx950,256,4337,8192,512,ck,0,0,50.9509,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,714.05,1520.53,0.0 +gfx950,256,4205,8192,512,ck,0,0,50.9943,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,691.73,1475.5,0.0 +gfx950,256,4167,8192,512,ck,0,0,51.0189,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,685.14,1462.2,0.0 +gfx950,256,4296,8192,512,ck,0,0,51.0619,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,705.76,1503.66,0.0 +gfx950,256,4488,8192,512,ck,0,0,51.3919,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,732.57,1557.12,0.0 +gfx950,256,734,7168,4096,ck,14,0,52.0695,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,827.75,823.69,0.0 +gfx950,256,272,9216,7168,ck,18,0,52.1304,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,689.36,1400.79,0.0 +gfx950,256,4365,8192,512,ck,18,0,52.133,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,702.36,1495.12,0.0 +gfx950,256,4540,8192,512,ck,0,0,52.3375,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,727.67,1545.78,0.0 +gfx950,256,4615,8192,512,ck,0,0,52.5736,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.37,1562.94,0.0 +gfx950,256,4616,8192,512,ck,0,0,52.661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.3,1560.67,0.0 +gfx950,256,806,7168,4096,ck,14,0,52.9547,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,893.75,834.98,0.0 +gfx950,256,304,9216,7168,ck,0,0,52.9874,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.0,1393.59,0.0 +gfx950,256,4687,8192,512,ck,0,0,53.0,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.84,1573.32,0.0 +gfx950,256,4661,8192,512,ck,0,0,53.0163,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.5,1564.55,0.0 +gfx950,256,4777,8192,512,ck,0,0,53.0228,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.76,1601.32,0.0 +gfx950,256,4660,8192,512,ck,0,0,53.0292,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.16,1563.85,0.0 +gfx950,256,4720,8192,512,ck,0,0,53.0339,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,746.58,1582.83,0.0 +gfx950,256,4670,8192,512,ck,0,0,53.0549,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,738.38,1566.28,0.0 +gfx950,256,4695,8192,512,ck,0,0,53.084,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.93,1573.37,0.0 +gfx950,256,4620,8192,512,ck,0,0,53.1335,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.4,1548.06,0.0 +gfx950,256,4614,8192,512,ck,0,0,53.1788,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,727.83,1544.83,0.0 +gfx950,256,4626,8192,512,ck,0,0,53.1812,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.69,1548.58,0.0 +gfx950,256,4728,8192,512,ck,0,0,53.2395,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.96,1579.25,0.0 +gfx950,256,4643,8192,512,ck,0,0,53.2486,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,731.44,1552.01,0.0 +gfx950,256,4672,8192,512,ck,0,0,53.2508,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.98,1561.15,0.0 +gfx950,256,4711,8192,512,ck,0,0,53.274,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.8,1572.84,0.0 +gfx950,256,4618,8192,512,ck,0,0,53.2963,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,726.85,1542.7,0.0 +gfx950,256,4637,8192,512,ck,0,0,53.3072,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.69,1548.4,0.0 +gfx950,256,4642,8192,512,ck,0,0,53.3082,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.47,1549.96,0.0 +gfx950,256,4703,8192,512,ck,0,0,53.3538,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.43,1567.95,0.0 +gfx950,256,4645,8192,512,ck,0,0,53.376,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.01,1548.94,0.0 +gfx950,256,830,7168,4096,ck,14,0,53.3798,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,913.04,836.62,0.0 +gfx950,256,4751,8192,512,ck,0,0,53.3999,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,746.34,1581.79,0.0 +gfx950,256,4705,8192,512,ck,0,0,53.4077,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.0,1567.0,0.0 +gfx950,256,4629,8192,512,ck,0,0,53.4319,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,726.74,1542.26,0.0 +gfx950,256,4690,8192,512,ck,0,0,53.432,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.31,1561.55,0.0 +gfx950,256,4832,8192,512,ck,0,0,53.4497,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.35,1605.92,0.0 +gfx950,256,4714,8192,512,ck,0,0,53.4499,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.83,1568.61,0.0 +gfx950,256,4852,8192,512,ck,0,0,53.4501,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.49,1612.23,0.0 +gfx950,256,809,7168,4096,ck,14,0,53.4563,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,888.66,828.18,0.0 +gfx950,256,4655,8192,512,ck,0,0,53.4563,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.48,1549.77,0.0 +gfx950,256,4715,8192,512,ck,0,0,53.5068,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.2,1567.26,0.0 +gfx950,256,4640,8192,512,ck,0,0,53.5167,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,727.31,1543.29,0.0 +gfx950,256,4701,8192,512,ck,0,0,53.5281,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.71,1562.21,0.0 +gfx950,256,4726,8192,512,ck,0,0,53.5355,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.53,1569.89,0.0 +gfx950,256,4694,8192,512,ck,0,0,53.5389,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.47,1559.69,0.0 +gfx950,256,4656,8192,512,ck,0,0,53.5488,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.38,1547.41,0.0 +gfx950,256,4708,8192,512,ck,0,0,53.5515,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.49,1563.74,0.0 +gfx950,256,4634,8192,512,ck,0,0,53.5599,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.78,1540.15,0.0 +gfx950,256,4748,8192,512,ck,0,0,53.5651,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.56,1575.96,0.0 +gfx950,256,4834,8192,512,ck,0,0,53.5735,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.91,1602.84,0.0 +gfx950,256,4636,8192,512,ck,0,0,53.6027,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.52,1539.55,0.0 +gfx950,256,4710,8192,512,ck,0,0,53.6035,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.09,1562.85,0.0 +gfx950,256,4794,8192,512,ck,0,0,53.6099,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.14,1589.14,0.0 +gfx950,256,4632,8192,512,ck,0,0,53.6099,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,724.79,1538.08,0.0 +gfx950,256,4666,8192,512,ck,0,0,53.6149,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.04,1548.66,0.0 +gfx950,256,4801,8192,512,ck,0,0,53.6342,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.9,1590.63,0.0 +gfx950,256,4638,8192,512,ck,0,0,53.6556,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.11,1538.66,0.0 +gfx950,256,4628,8192,512,ck,0,0,53.6567,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.53,1535.48,0.0 +gfx950,256,4712,8192,512,ck,0,0,53.6722,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.45,1561.48,0.0 +gfx950,256,4716,8192,512,ck,0,0,53.6725,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.08,1562.73,0.0 +gfx950,256,4821,8192,512,ck,0,0,53.6848,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.31,1595.42,0.0 +gfx950,256,4848,8192,512,ck,0,0,53.7016,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.3,1603.42,0.0 +gfx950,256,4798,8192,512,ck,0,0,53.7046,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.44,1587.6,0.0 +gfx950,256,4669,8192,512,ck,0,0,53.7555,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.6,1545.55,0.0 +gfx950,256,4729,8192,512,ck,0,0,53.756,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.96,1564.39,0.0 +gfx950,256,4750,8192,512,ck,0,0,53.7671,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.08,1570.67,0.0 +gfx950,256,4770,8192,512,ck,0,0,53.7771,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.07,1576.66,0.0 +gfx950,256,4818,8192,512,ck,0,0,53.7867,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.42,1591.46,0.0 +gfx950,256,814,7168,4096,ck,14,0,53.7985,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,888.47,824.63,0.0 +gfx950,256,4704,8192,512,ck,0,0,53.8095,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,733.33,1554.99,0.0 +gfx950,256,4792,8192,512,ck,0,0,53.8107,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.03,1582.58,0.0 +gfx950,256,4929,8192,512,ck,0,0,53.8128,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.36,1625.54,0.0 +gfx950,256,4743,8192,512,ck,0,0,53.8149,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.33,1567.08,0.0 +gfx950,256,4773,8192,512,ck,0,0,53.826,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.86,1576.17,0.0 +gfx950,256,4785,8192,512,ck,0,0,53.8492,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.41,1579.26,0.0 +gfx950,256,4745,8192,512,ck,0,0,53.8626,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,738.99,1566.32,0.0 +gfx950,256,4610,8192,512,ck,0,0,53.8627,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,717.96,1523.96,0.0 +gfx950,256,4967,8192,512,ck,0,0,53.88,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.32,1635.43,0.0 +gfx950,256,4763,8192,512,ck,0,0,53.8863,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.47,1571.27,0.0 +gfx950,256,4752,8192,512,ck,0,0,53.8914,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.69,1567.67,0.0 +gfx950,256,4662,8192,512,ck,0,0,53.901,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.55,1539.18,0.0 +gfx950,256,4861,8192,512,ck,0,0,53.9024,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.5,1601.52,0.0 +gfx950,256,4789,8192,512,ck,0,0,53.9075,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.22,1578.8,0.0 +gfx950,256,4902,8192,512,ck,0,0,53.9139,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.72,1614.03,0.0 +gfx950,256,2711,6144,1536,ck,0,0,53.9139,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,949.08,870.17,0.0 +gfx950,256,877,7168,4096,ck,0,0,53.9274,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,954.94,844.19,0.0 +gfx950,256,4768,8192,512,ck,0,0,53.9299,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.65,1571.57,0.0 +gfx950,256,4758,8192,512,ck,0,0,53.932,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.06,1568.37,0.0 +gfx950,256,4760,8192,512,ck,0,0,53.9373,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.3,1568.85,0.0 +gfx950,256,4858,8192,512,ck,0,0,53.9499,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.36,1599.17,0.0 +gfx950,256,4793,8192,512,ck,0,0,53.9509,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.24,1578.78,0.0 +gfx950,256,4664,8192,512,ck,0,0,53.9669,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,724.97,1537.93,0.0 +gfx950,256,4689,8192,512,ck,0,0,53.9682,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,728.84,1545.72,0.0 +gfx950,256,4679,8192,512,ck,0,0,53.97,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,727.26,1542.54,0.0 +gfx950,256,4791,8192,512,ck,0,0,53.9884,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.42,1577.06,0.0 +gfx950,256,813,7168,4096,ck,14,0,53.9916,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,884.2,821.34,0.0 +gfx950,256,4859,8192,512,ck,0,0,53.9971,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.86,1598.09,0.0 +gfx950,256,4807,8192,512,ck,0,0,54.0012,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,746.72,1581.69,0.0 +gfx950,256,4799,8192,512,ck,0,0,54.0175,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.26,1578.71,0.0 +gfx950,256,4707,8192,512,ck,0,0,54.0319,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.78,1549.52,0.0 +gfx950,256,4700,8192,512,ck,0,0,54.0328,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.68,1547.31,0.0 +gfx950,256,4853,8192,512,ck,0,0,54.0347,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.4,1595.1,0.0 +gfx950,256,4927,8192,512,ck,0,0,54.0613,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.51,1617.44,0.0 +gfx950,256,4847,8192,512,ck,0,0,54.0671,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.02,1592.27,0.0 +gfx950,256,4924,8192,512,ck,0,0,54.0751,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.85,1616.09,0.0 +gfx950,256,4804,8192,512,ck,0,0,54.0847,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.11,1578.31,0.0 +gfx950,256,4952,8192,512,ck,0,0,54.0947,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.92,1624.25,0.0 +gfx950,256,4884,8192,512,ck,0,0,54.1163,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.07,1602.37,0.0 +gfx950,256,4976,8192,512,ck,0,0,54.1207,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.27,1630.96,0.0 +gfx950,256,4784,8192,512,ck,0,0,54.1214,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.5,1571.0,0.0 +gfx950,256,4968,8192,512,ck,0,0,54.1288,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.92,1628.22,0.0 +gfx950,256,4780,8192,512,ck,0,0,54.1348,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.7,1569.36,0.0 +gfx950,256,4774,8192,512,ck,0,0,54.1583,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.45,1566.81,0.0 +gfx950,256,4909,8192,512,ck,0,0,54.1589,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.35,1608.91,0.0 +gfx950,256,910,7168,4096,ck,0,0,54.1653,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,986.53,851.71,0.0 +gfx950,256,4835,8192,512,ck,0,0,54.1766,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.64,1585.31,0.0 +gfx950,256,4984,8192,512,ck,0,0,54.1876,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.56,1631.44,0.0 +gfx950,256,4940,8192,512,ck,0,0,54.2155,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.35,1616.89,0.0 +gfx950,256,4962,8192,512,ck,0,0,54.2245,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.63,1623.48,0.0 +gfx950,256,4740,8192,512,ck,0,0,54.2275,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,733.24,1554.22,0.0 +gfx950,256,4918,8192,512,ck,0,0,54.2298,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.75,1609.61,0.0 +gfx950,256,4841,8192,512,ck,0,0,54.2351,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.76,1585.46,0.0 +gfx950,256,336,9216,7168,ck,0,0,54.2497,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.3,1376.26,0.0 +gfx950,256,4796,8192,512,ck,0,0,54.2727,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.29,1570.36,0.0 +gfx950,256,4892,8192,512,ck,0,0,54.2732,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.12,1600.23,0.0 +gfx950,256,2714,6144,1536,ck,0,0,54.2762,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,943.78,865.12,0.0 +gfx950,256,4890,8192,512,ck,0,0,54.2803,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.71,1599.4,0.0 +gfx950,256,4907,8192,512,ck,0,0,54.2835,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.29,1604.59,0.0 +gfx950,256,4826,8192,512,ck,0,0,54.2843,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.77,1579.36,0.0 +gfx950,256,902,7168,4096,ck,0,0,54.2882,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,975.64,847.07,0.0 +gfx950,256,4938,8192,512,ck,0,0,54.2901,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.99,1614.05,0.0 +gfx950,256,4917,8192,512,ck,0,0,54.3135,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.42,1606.82,0.0 +gfx950,256,4738,8192,512,ck,0,0,54.33,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,731.55,1550.66,0.0 +gfx950,256,4843,8192,512,ck,0,0,54.3315,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.74,1583.27,0.0 +gfx950,256,2784,6144,1536,ck,0,0,54.3348,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,967.08,882.0,0.0 +gfx950,256,4943,8192,512,ck,0,0,54.3471,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.96,1613.91,0.0 +gfx950,256,4844,8192,512,ck,0,0,54.3474,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.68,1583.12,0.0 +gfx950,256,856,7168,4096,ck,0,0,54.3681,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,924.52,830.23,0.0 +gfx950,256,2748,6144,1536,ck,0,0,54.3682,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,953.99,872.3,0.0 +gfx950,256,4928,8192,512,ck,0,0,54.3693,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.34,1608.59,0.0 +gfx950,256,4959,8192,512,ck,0,0,54.3819,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.94,1617.85,0.0 +gfx950,256,4925,8192,512,ck,0,0,54.3829,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.69,1607.25,0.0 +gfx950,256,2746,6144,1536,ck,0,0,54.3952,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,952.82,871.36,0.0 +gfx950,256,4894,8192,512,ck,0,0,54.4011,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.65,1597.09,0.0 +gfx950,256,4899,8192,512,ck,0,0,54.4196,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.17,1598.1,0.0 +gfx950,256,4948,8192,512,ck,0,0,54.4228,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.67,1613.22,0.0 +gfx950,256,4830,8192,512,ck,0,0,54.4315,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.37,1576.33,0.0 +gfx950,256,4965,8192,512,ck,0,0,54.4511,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.9,1617.65,0.0 +gfx950,256,2747,6144,1536,ck,0,0,54.4593,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,952.05,870.59,0.0 +gfx950,256,2702,6144,1536,ck,0,0,54.468,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,936.3,859.03,0.0 +gfx950,256,2694,6144,1536,ck,0,0,54.4791,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,933.34,856.82,0.0 +gfx950,256,4964,8192,512,ck,0,0,54.4919,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.17,1616.13,0.0 +gfx950,256,734,7168,4608,ck,0,0,54.4966,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,889.75,861.25,0.0 +gfx950,256,4889,8192,512,ck,0,0,54.5155,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.3,1592.19,0.0 +gfx950,256,4895,8192,512,ck,0,0,54.5219,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.13,1593.86,0.0 +gfx950,256,4885,8192,512,ck,0,0,54.5524,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.17,1589.87,0.0 +gfx950,256,4969,8192,512,ck,0,0,54.5536,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.07,1615.85,0.0 +gfx950,256,4810,8192,512,ck,0,0,54.5582,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.56,1566.48,0.0 +gfx950,256,2734,6144,1536,ck,0,0,54.5708,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,945.61,865.52,0.0 +gfx950,256,920,7168,4096,ck,0,0,54.6092,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,989.26,848.16,0.0 +gfx950,256,4915,8192,512,ck,0,0,54.6131,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.95,1597.38,0.0 +gfx950,256,4898,8192,512,ck,0,0,54.6283,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.13,1591.68,0.0 +gfx950,256,4856,8192,512,ck,0,0,54.6472,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.42,1578.15,0.0 +gfx950,256,2109,8192,1536,ck,0,0,54.6871,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,970.52,921.17,0.0 +gfx950,256,4904,8192,512,ck,0,0,54.6953,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.13,1591.59,0.0 +gfx950,256,2713,6144,1536,ck,0,0,54.6983,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,936.16,858.19,0.0 +gfx950,256,2740,6144,1536,ck,0,0,54.6983,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,945.47,865.02,0.0 +gfx950,256,5046,8192,512,ck,0,0,54.7135,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.65,1634.91,0.0 +gfx950,256,4879,8192,512,ck,0,0,54.7183,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.98,1583.2,0.0 +gfx950,256,4839,8192,512,ck,0,0,54.7284,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.71,1570.56,0.0 +gfx950,256,4836,8192,512,ck,0,0,54.7379,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.12,1569.36,0.0 +gfx950,256,4905,8192,512,ck,0,0,54.7498,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.53,1590.31,0.0 +gfx950,256,4991,8192,512,ck,0,0,54.7848,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.22,1615.82,0.0 +gfx950,256,5038,8192,512,ck,0,0,54.8082,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.09,1629.62,0.0 +gfx950,256,4881,8192,512,ck,0,0,54.8098,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.03,1581.17,0.0 +gfx950,256,2087,8192,1536,ck,0,0,54.8237,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,958.0,911.69,0.0 +gfx950,256,5055,8192,512,ck,0,0,54.8267,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.43,1634.31,0.0 +gfx950,256,5008,8192,512,ck,0,0,54.9131,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.03,1617.27,0.0 +gfx950,256,2085,8192,1536,ck,0,0,54.9265,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,955.29,909.33,0.0 +gfx950,256,4995,8192,512,ck,0,0,54.9595,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.4,1611.91,0.0 +gfx950,256,2088,8192,1536,ck,0,0,54.9772,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,955.78,909.47,0.0 +gfx950,256,2794,6144,1536,ck,0,0,54.9775,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,959.21,874.2,0.0 +gfx950,256,5107,8192,512,ck,0,0,54.9828,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.16,1645.65,0.0 +gfx950,256,5051,8192,512,ck,0,0,54.9999,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.38,1627.93,0.0 +gfx950,256,5025,8192,512,ck,0,0,55.0067,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.32,1619.74,0.0 +gfx950,256,4926,8192,512,ck,0,0,55.0079,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.21,1589.3,0.0 +gfx950,256,5069,8192,512,ck,0,0,55.0406,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.55,1632.25,0.0 +gfx950,256,5009,8192,512,ck,0,0,55.0495,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.29,1613.57,0.0 +gfx950,256,2909,6144,1536,ck,0,0,55.061,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,997.18,901.75,0.0 +gfx950,256,5033,8192,512,ck,0,0,55.0759,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.58,1620.16,0.0 +gfx950,256,5058,8192,512,ck,0,0,55.0868,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.23,1627.51,0.0 +gfx950,256,5034,8192,512,ck,0,0,55.1111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.24,1619.43,0.0 +gfx950,256,2054,8192,1536,ck,0,0,55.1295,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,937.62,895.9,0.0 +gfx950,256,5057,8192,512,ck,0,0,55.1479,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.23,1625.4,0.0 +gfx950,256,5024,8192,512,ck,0,0,55.1539,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.12,1615.11,0.0 +gfx950,256,5099,8192,512,ck,0,0,55.1608,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.43,1637.88,0.0 +gfx950,256,849,7168,4096,ck,0,0,55.1692,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,903.65,815.83,0.0 +gfx950,256,2729,6144,1536,ck,0,0,55.1724,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,933.59,854.83,0.0 +gfx950,256,5017,8192,512,ck,0,0,55.1815,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.68,1612.16,0.0 +gfx950,256,2842,6144,1536,ck,0,0,55.206,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,971.65,882.6,0.0 +gfx950,256,5052,8192,512,ck,0,0,55.2331,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.28,1621.36,0.0 +gfx950,256,4873,8192,512,ck,0,0,55.2399,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.0,1566.41,0.0 +gfx950,256,5103,8192,512,ck,0,0,55.2638,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.6,1636.05,0.0 +gfx950,256,4950,8192,512,ck,0,0,55.2707,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.28,1589.08,0.0 +gfx950,256,2091,8192,1536,ck,0,0,55.2851,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,951.82,905.37,0.0 +gfx950,256,2150,8192,1536,ck,0,0,55.3091,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,978.26,924.1,0.0 +gfx950,256,2841,6144,1536,ck,0,0,55.3348,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,969.05,880.3,0.0 +gfx950,256,5043,8192,512,ck,0,0,55.3425,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.4,1615.41,0.0 +gfx950,256,2056,8192,1536,ck,0,0,55.3426,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,934.92,893.1,0.0 +gfx950,256,2757,6144,1536,ck,0,0,55.3597,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,939.97,858.93,0.0 +gfx950,256,5109,8192,512,ck,0,0,55.3628,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.12,1634.96,0.0 +gfx950,256,5019,8192,512,ck,0,0,55.3671,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.42,1607.37,0.0 +gfx950,256,869,7168,4096,ck,0,0,55.3857,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,921.32,819.3,0.0 +gfx950,256,4911,8192,512,ck,0,0,55.386,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.81,1573.87,0.0 +gfx950,256,2112,8192,1536,ck,0,0,55.3987,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,959.41,910.31,0.0 +gfx950,256,2090,8192,1536,ck,0,0,55.4059,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,949.3,903.08,0.0 +gfx950,256,3006,6144,1536,ck,0,0,55.406,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1024.01,920.34,0.0 +gfx950,256,5094,8192,512,ck,0,0,55.4127,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.15,1628.91,0.0 +gfx950,256,5036,8192,512,ck,0,0,55.4312,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.12,1610.69,0.0 +gfx950,256,5076,8192,512,ck,0,0,55.4719,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.61,1621.69,0.0 +gfx950,256,2889,6144,1536,ck,0,0,55.4729,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,982.97,890.07,0.0 +gfx950,256,2128,8192,1536,ck,0,0,55.4742,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,965.37,914.24,0.0 +gfx950,256,2111,8192,1536,ck,0,0,55.5033,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,957.15,908.27,0.0 +gfx950,256,2166,8192,1536,ck,0,0,55.5187,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,981.82,925.77,0.0 +gfx950,256,5031,8192,512,ck,0,0,55.5247,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.08,1606.46,0.0 +gfx950,256,2148,8192,1536,ck,0,0,55.5267,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,973.52,919.83,0.0 +gfx950,256,2127,8192,1536,ck,0,0,55.5319,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,963.91,912.97,0.0 +gfx950,256,2129,8192,1536,ck,0,0,55.5811,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,963.96,912.8,0.0 +gfx950,256,2219,8192,1536,ck,0,0,55.59,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1004.55,941.67,0.0 +gfx950,256,2149,8192,1536,ck,0,0,55.5955,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,972.76,919.01,0.0 +gfx950,256,5085,8192,512,ck,0,0,55.6097,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.06,1620.41,0.0 +gfx950,256,2146,8192,1536,ck,0,0,55.636,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,970.7,917.38,0.0 +gfx950,256,4999,8192,512,ck,0,0,55.6432,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.63,1593.32,0.0 +gfx950,256,5105,8192,512,ck,0,0,55.6512,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.5,1625.27,0.0 +gfx950,256,930,7168,4096,ck,0,0,55.6564,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,981.2,835.52,0.0 +gfx950,256,3004,6144,1536,ck,0,0,55.66,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1018.66,915.64,0.0 +gfx950,256,2861,6144,1536,ck,0,0,55.6645,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,970.09,880.05,0.0 +gfx950,256,5088,8192,512,ck,0,0,55.6738,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.63,1619.45,0.0 +gfx950,256,5026,8192,512,ck,0,0,55.6751,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.27,1600.6,0.0 +gfx950,256,2164,8192,1536,ck,0,0,55.6799,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,978.07,922.45,0.0 +gfx950,256,5097,8192,512,ck,0,0,55.6799,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.9,1622.01,0.0 +gfx950,256,3001,6144,1536,ck,0,0,55.6812,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1017.25,914.55,0.0 +gfx950,256,2843,6144,1536,ck,0,0,55.6903,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,963.54,875.18,0.0 +gfx950,256,5007,8192,512,ck,0,0,55.6911,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.19,1594.38,0.0 +gfx950,256,2160,8192,1536,ck,0,0,55.7035,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,975.85,920.77,0.0 +gfx950,256,5102,8192,512,ck,0,0,55.706,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.3,1622.76,0.0 +gfx950,256,5030,8192,512,ck,0,0,55.7217,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.24,1600.47,0.0 +gfx950,256,2189,8192,1536,ck,0,0,55.7221,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,988.62,929.79,0.0 +gfx950,256,2101,8192,1536,ck,0,0,55.7226,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,948.87,901.48,0.0 +gfx950,256,5029,8192,512,ck,0,0,55.7325,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.94,1599.86,0.0 +gfx950,256,2126,8192,1536,ck,0,0,55.7335,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,959.97,909.34,0.0 +gfx950,256,5049,8192,512,ck,0,0,55.7348,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.92,1605.86,0.0 +gfx950,256,5022,8192,512,ck,0,0,55.7417,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.76,1597.48,0.0 +gfx950,256,3005,6144,1536,ck,0,0,55.7621,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1017.13,914.21,0.0 +gfx950,256,5092,8192,512,ck,0,0,55.7656,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.97,1618.0,0.0 +gfx950,256,2945,6144,1536,ck,0,0,55.7728,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,996.63,899.16,0.0 +gfx950,256,5020,8192,512,ck,0,0,55.7983,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.7,1595.25,0.0 +gfx950,256,3019,6144,1536,ck,0,0,55.8279,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1020.67,916.6,0.0 +gfx950,256,2116,8192,1536,ck,0,0,55.8316,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,953.78,904.53,0.0 +gfx950,256,5004,8192,512,ck,0,0,55.8331,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.82,1589.41,0.0 +gfx950,256,2186,8192,1536,ck,0,0,55.8495,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,985.01,926.71,0.0 +gfx950,256,5104,8192,512,ck,0,0,55.8803,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.2,1618.31,0.0 +gfx950,256,5110,8192,512,ck,0,0,55.8857,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.03,1619.96,0.0 +gfx950,256,2086,8192,1536,ck,0,0,55.894,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,939.2,893.91,0.0 +gfx950,256,5039,8192,512,ck,0,0,55.8959,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.23,1598.21,0.0 +gfx950,256,2254,8192,1536,ck,0,0,55.8991,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1014.75,947.68,0.0 +gfx950,256,2304,8192,1536,ck,0,0,55.9028,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1037.19,963.65,0.0 +gfx950,256,5011,8192,512,ck,0,0,55.9156,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.76,1589.18,0.0 +gfx950,256,2215,8192,1536,ck,0,0,55.9183,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,996.85,934.86,0.0 +gfx950,256,5071,8192,512,ck,0,0,55.9191,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.72,1607.21,0.0 +gfx950,256,1009,7168,4096,ck,0,0,55.9258,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1059.42,857.53,0.0 +gfx950,256,2185,8192,1536,ck,0,0,55.9443,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,982.89,924.81,0.0 +gfx950,256,5108,8192,512,ck,0,0,55.9604,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.7,1617.2,0.0 +gfx950,256,5119,8192,512,ck,0,0,55.9607,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.35,1620.51,0.0 +gfx950,256,2163,8192,1536,ck,0,0,55.9667,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,972.61,917.4,0.0 +gfx950,256,2251,8192,1536,ck,0,0,56.0023,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1011.53,944.98,0.0 +gfx950,256,5095,8192,512,ck,0,0,56.0055,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.14,1611.97,0.0 +gfx950,256,2161,8192,1536,ck,0,0,56.0079,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,970.99,916.09,0.0 +gfx950,256,2946,6144,1536,ck,0,0,56.0153,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,992.66,895.52,0.0 +gfx950,256,2130,8192,1536,ck,0,0,56.0609,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,956.16,905.31,0.0 +gfx950,256,2226,8192,1536,ck,0,0,56.0786,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,998.94,935.7,0.0 +gfx950,256,2268,8192,1536,ck,0,0,56.1091,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1017.23,948.61,0.0 +gfx950,256,1003,7168,4096,ck,0,0,56.1207,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1049.46,852.58,0.0 +gfx950,256,5065,8192,512,ck,0,0,56.1395,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.83,1599.1,0.0 +gfx950,256,830,7168,4608,ck,0,0,56.2103,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,975.44,867.34,0.0 +gfx950,256,2217,8192,1536,ck,0,0,56.2146,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,992.49,930.57,0.0 +gfx950,256,1000,7168,4096,ck,0,0,56.2344,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1044.21,849.87,0.0 +gfx950,256,368,9216,7168,ck,0,0,56.2365,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,864.57,1342.21,0.0 +gfx950,256,1007,7168,4096,ck,0,0,56.2479,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1051.26,851.96,0.0 +gfx950,256,5061,8192,512,ck,0,0,56.2536,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.7,1594.65,0.0 +gfx950,256,3021,6144,1536,ck,0,0,56.3261,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1012.31,908.98,0.0 +gfx950,256,5001,8192,512,ck,0,0,56.3575,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.38,1573.72,0.0 +gfx950,256,1023,7168,4096,ck,0,0,56.3978,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1065.13,854.93,0.0 +gfx950,256,856,7168,4608,ck,0,0,56.4124,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1002.4,872.97,0.0 +gfx950,256,814,7168,4608,ck,0,0,56.4473,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,952.62,858.33,0.0 +gfx950,256,4994,8192,512,ck,0,0,56.4532,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,742.08,1568.96,0.0 +gfx950,256,2918,6144,1536,ck,0,0,56.4609,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,975.46,881.59,0.0 +gfx950,256,3000,6144,1536,ck,0,0,56.4672,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1002.76,901.57,0.0 +gfx950,256,2964,6144,1536,ck,0,0,56.5025,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,990.11,892.2,0.0 +gfx950,256,5012,8192,512,ck,0,0,56.5359,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.66,1572.05,0.0 +gfx950,256,2947,6144,1536,ck,0,0,56.5853,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,982.99,886.74,0.0 +gfx950,256,2216,8192,1536,ck,0,0,56.6007,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,985.28,923.9,0.0 +gfx950,256,2876,6144,1536,ck,0,0,56.6042,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,958.99,869.11,0.0 +gfx950,256,2188,8192,1536,ck,0,0,56.6311,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,972.31,914.55,0.0 +gfx950,256,809,7168,4608,ck,0,0,56.6401,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,943.55,853.74,0.0 +gfx950,256,5064,8192,512,ck,0,0,56.7147,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.01,1582.58,0.0 +gfx950,256,813,7168,4608,ck,0,0,56.752,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,946.35,853.39,0.0 +gfx950,256,4761,8192,512,ck,18,0,56.766,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,703.56,1490.97,0.0 +gfx950,256,2207,8192,1536,ck,0,0,56.7735,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,978.29,918.25,0.0 +gfx950,256,2250,8192,1536,ck,0,0,56.8375,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,996.23,930.77,0.0 +gfx950,256,2955,6144,1536,ck,0,0,56.8389,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,981.26,884.73,0.0 +gfx950,256,1001,7168,4096,ck,0,0,56.8615,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1033.72,840.82,0.0 +gfx950,256,2249,8192,1536,ck,0,0,56.9771,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,993.35,928.18,0.0 +gfx950,256,2248,8192,1536,ck,0,0,57.0243,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,992.08,927.1,0.0 +gfx950,256,849,7168,4608,ck,0,0,57.0264,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,983.5,861.24,0.0 +gfx950,256,2390,8192,1536,ck,0,0,57.06,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1054.09,971.11,0.0 +gfx950,256,1015,7168,4096,ck,0,0,57.1459,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1042.96,841.16,0.0 +gfx950,256,2369,8192,1536,ck,0,0,57.1547,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1043.1,962.92,0.0 +gfx950,256,3138,6144,1536,ck,0,0,57.1719,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1035.96,923.83,0.0 +gfx950,256,3131,6144,1536,ck,0,0,57.2208,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1032.77,921.35,0.0 +gfx950,256,2403,8192,1536,ck,0,0,57.2235,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1056.79,972.41,0.0 +gfx950,256,2378,8192,1536,ck,0,0,57.2388,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1045.52,964.32,0.0 +gfx950,256,8192,512,7168,ck,0,0,57.2725,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1049.89,1235.83,0.0 +gfx950,256,806,7168,4608,ck,0,0,57.273,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,929.66,843.31,0.0 +gfx950,256,2365,8192,1536,ck,0,0,57.3069,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1038.57,959.11,0.0 +gfx950,256,2282,8192,1536,ck,0,0,57.3199,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1001.89,932.95,0.0 +gfx950,256,2301,8192,1536,ck,0,0,57.3503,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1009.7,938.39,0.0 +gfx950,256,2235,8192,1536,ck,0,0,57.4197,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,979.55,916.66,0.0 +gfx950,256,2387,8192,1536,ck,0,0,57.4219,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1046.13,964.06,0.0 +gfx950,256,2952,6144,1536,ck,0,0,57.4389,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,970.02,874.77,0.0 +gfx950,256,5181,8192,512,ck,0,0,57.4615,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.36,1596.42,0.0 +gfx950,256,2368,8192,1536,ck,0,0,57.5075,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1036.26,956.7,0.0 +gfx950,256,5137,8192,512,ck,0,0,57.5412,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.89,1581.29,0.0 +gfx950,256,869,7168,4608,ck,0,0,57.5736,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,997.1,859.64,0.0 +gfx950,256,5154,8192,512,ck,0,0,57.5761,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.92,1585.32,0.0 +gfx950,256,2252,8192,1536,ck,0,0,57.5822,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,984.22,919.36,0.0 +gfx950,256,2415,8192,1536,ck,0,0,57.5839,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1055.42,970.06,0.0 +gfx950,256,910,7168,4608,ck,0,0,57.5955,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1043.74,872.8,0.0 +gfx950,256,2412,8192,1536,ck,0,0,57.6016,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1053.79,968.83,0.0 +gfx950,256,2344,8192,1536,ck,0,0,57.6196,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1023.76,947.38,0.0 +gfx950,256,5101,8192,512,ck,0,0,57.6506,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,742.23,1567.73,0.0 +gfx950,256,5200,8192,512,ck,0,0,57.7071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.9,1595.19,0.0 +gfx950,256,1047,7168,4096,ck,0,0,57.7162,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1065.21,843.06,0.0 +gfx950,256,5153,8192,512,ck,0,0,57.7831,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.08,1579.34,0.0 +gfx950,256,5247,8192,512,ck,0,0,57.8088,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.39,1606.12,0.0 +gfx950,256,1040,7168,4096,ck,0,0,57.8103,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1056.37,839.46,0.0 +gfx950,256,5238,8192,512,ck,0,0,57.8227,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.9,1603.1,0.0 +gfx950,256,5197,8192,512,ck,0,0,57.8329,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.82,1590.84,0.0 +gfx950,256,5172,8192,512,ck,0,0,57.8547,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.91,1582.94,0.0 +gfx950,256,5142,8192,512,ck,0,0,57.8687,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.38,1573.8,0.0 +gfx950,256,5144,8192,512,ck,0,0,57.9027,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.23,1573.46,0.0 +gfx950,256,2356,8192,1536,ck,0,0,57.9115,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1023.82,946.31,0.0 +gfx950,256,5140,8192,512,ck,0,0,57.9308,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.29,1571.53,0.0 +gfx950,256,5151,8192,512,ck,0,0,58.001,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.98,1572.83,0.0 +gfx950,256,5175,8192,512,ck,0,0,58.0106,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.33,1579.56,0.0 +gfx950,256,5209,8192,512,ck,0,0,58.0136,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.21,1589.38,0.0 +gfx950,256,3324,6144,1536,ck,0,0,58.0268,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1081.2,954.53,0.0 +gfx950,256,5234,8192,512,ck,0,0,58.0461,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.4,1595.77,0.0 +gfx950,256,5225,8192,512,ck,0,0,58.0623,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.89,1592.7,0.0 +gfx950,256,5241,8192,512,ck,0,0,58.0631,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.19,1597.34,0.0 +gfx950,256,5180,8192,512,ck,0,0,58.0738,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.24,1579.29,0.0 +gfx950,256,877,7168,4608,ck,0,0,58.1111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,996.97,854.3,0.0 +gfx950,256,5000,8192,512,ck,0,0,58.1346,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,721.48,1525.33,0.0 +gfx950,256,5235,8192,512,ck,0,0,58.1563,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.11,1593.03,0.0 +gfx950,256,902,7168,4608,ck,0,0,58.2213,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1023.45,860.81,0.0 +gfx950,256,5193,8192,512,ck,0,0,58.2326,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.07,1578.76,0.0 +gfx950,256,5226,8192,512,ck,0,0,58.2759,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.26,1587.15,0.0 +gfx950,256,5139,8192,512,ck,0,0,58.2991,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.45,1561.31,0.0 +gfx950,256,5352,8192,512,ck,0,0,58.3097,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.95,1622.74,0.0 +gfx950,256,5376,8192,512,ck,0,0,58.327,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.18,1629.21,0.0 +gfx950,256,2355,8192,1536,ck,0,0,58.3399,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1015.87,939.06,0.0 +gfx950,256,5187,8192,512,ck,0,0,58.3471,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.74,1573.92,0.0 +gfx950,256,5237,8192,512,ck,0,0,58.3519,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.87,1588.27,0.0 +gfx950,256,2445,8192,1536,ck,0,0,58.3535,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1054.44,966.48,0.0 +gfx950,256,3258,6144,1536,ck,0,0,58.3556,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1053.76,933.51,0.0 +gfx950,256,1087,7168,4096,ck,0,0,58.3592,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1093.73,846.41,0.0 +gfx950,256,5230,8192,512,ck,0,0,58.3627,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.72,1585.95,0.0 +gfx950,256,2441,8192,1536,ck,0,0,58.3683,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1052.45,965.0,0.0 +gfx950,256,5345,8192,512,ck,0,0,58.3951,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.82,1618.35,0.0 +gfx950,256,5223,8192,512,ck,0,0,58.3987,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.25,1582.95,0.0 +gfx950,256,5190,8192,512,ck,0,0,58.4319,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.09,1572.51,0.0 +gfx950,256,2486,8192,1536,ck,0,0,58.438,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1070.57,977.65,0.0 +gfx950,256,5194,8192,512,ck,0,0,58.4447,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.5,1573.32,0.0 +gfx950,256,5186,8192,512,ck,0,0,58.4467,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.32,1570.95,0.0 +gfx950,256,5358,8192,512,ck,0,0,58.4547,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.91,1620.45,0.0 +gfx950,256,5221,8192,512,ck,0,0,58.4573,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.21,1580.78,0.0 +gfx950,256,2443,8192,1536,ck,0,0,58.4576,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1051.7,964.14,0.0 +gfx950,256,2515,8192,1536,ck,0,0,58.4583,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1082.69,986.2,0.0 +gfx950,256,5321,8192,512,ck,0,0,58.4676,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.43,1609.4,0.0 +gfx950,256,5243,8192,512,ck,0,0,58.4727,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.17,1586.72,0.0 +gfx950,256,5170,8192,512,ck,0,0,58.4808,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.6,1565.41,0.0 +gfx950,256,5231,8192,512,ck,0,0,58.4855,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.29,1582.91,0.0 +gfx950,256,5246,8192,512,ck,0,0,58.4961,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.3,1586.96,0.0 +gfx950,256,5127,8192,512,ck,0,0,58.499,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.2,1552.51,0.0 +gfx950,256,5236,8192,512,ck,0,0,58.5055,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.75,1583.81,0.0 +gfx950,256,2358,8192,1536,ck,0,0,58.5082,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1014.23,937.27,0.0 +gfx950,256,5155,8192,512,ck,0,0,58.526,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,738.87,1559.87,0.0 +gfx950,256,5356,8192,512,ck,0,0,58.5264,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.68,1617.89,0.0 +gfx950,256,920,7168,4608,ck,0,0,58.5307,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1038.35,862.09,0.0 +gfx950,256,2464,8192,1536,ck,0,0,58.572,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1058.67,968.68,0.0 +gfx950,256,5159,8192,512,ck,0,0,58.574,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,738.84,1559.75,0.0 +gfx950,256,5256,8192,512,ck,0,0,58.5788,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.67,1587.6,0.0 +gfx950,256,5207,8192,512,ck,0,0,58.5831,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.6,1573.35,0.0 +gfx950,256,5141,8192,512,ck,0,0,58.6031,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.9,1553.79,0.0 +gfx950,256,1112,7168,4096,ck,0,0,58.6105,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1114.08,850.64,0.0 +gfx950,256,5313,8192,512,ck,0,0,58.6452,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.97,1602.22,0.0 +gfx950,256,2544,8192,1536,ck,0,0,58.664,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1091.33,991.6,0.0 +gfx950,256,1001,7168,4608,ck,0,0,58.7007,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1126.5,885.73,0.0 +gfx950,256,5363,8192,512,ck,0,0,58.7066,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.32,1614.94,0.0 +gfx950,256,5145,8192,512,ck,0,0,58.7511,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,734.61,1551.02,0.0 +gfx950,256,5354,8192,512,ck,0,0,58.7588,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.36,1610.92,0.0 +gfx950,256,5289,8192,512,ck,0,0,58.8604,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.77,1589.48,0.0 +gfx950,256,5122,8192,512,ck,0,0,58.8675,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.88,1541.35,0.0 +gfx950,256,5355,8192,512,ck,0,0,58.9004,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.66,1607.33,0.0 +gfx950,256,5315,8192,512,ck,0,0,58.913,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.8,1595.51,0.0 +gfx950,256,5227,8192,512,ck,0,0,58.9132,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.27,1570.27,0.0 +gfx950,256,2444,8192,1536,ck,0,0,58.9288,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1043.72,956.74,0.0 +gfx950,256,5286,8192,512,ck,0,0,58.9636,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.03,1585.84,0.0 +gfx950,256,5536,8192,512,ck,0,0,59.0,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.11,1656.45,0.0 +gfx950,256,5362,8192,512,ck,0,0,59.0032,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.33,1606.53,0.0 +gfx950,256,5553,8192,512,ck,0,0,59.0052,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.45,1661.17,0.0 +gfx950,256,5297,8192,512,ck,0,0,59.0355,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.67,1587.05,0.0 +gfx950,256,5365,8192,512,ck,0,0,59.054,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.1,1606.01,0.0 +gfx950,256,2479,8192,1536,ck,0,0,59.0756,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1056.04,964.98,0.0 +gfx950,256,5632,8192,512,ck,0,0,59.0767,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.72,1681.76,0.0 +gfx950,256,5343,8192,512,ck,0,0,59.0911,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.5,1598.71,0.0 +gfx950,256,1009,7168,4608,ck,0,0,59.0976,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1127.88,882.35,0.0 +gfx950,256,5344,8192,512,ck,0,0,59.1144,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.34,1598.37,0.0 +gfx950,256,5587,8192,512,ck,0,0,59.1156,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.81,1667.79,0.0 +gfx950,256,5329,8192,512,ck,0,0,59.1238,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.09,1593.83,0.0 +gfx950,256,5353,8192,512,ck,0,0,59.1288,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.43,1600.55,0.0 +gfx950,256,930,7168,4608,ck,0,0,59.1301,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1039.0,856.55,0.0 +gfx950,256,5301,8192,512,ck,0,0,59.1341,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.99,1585.55,0.0 +gfx950,256,5311,8192,512,ck,0,0,59.1376,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.36,1588.31,0.0 +gfx950,256,5253,8192,512,ck,0,0,59.1744,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.67,1570.76,0.0 +gfx950,256,1000,7168,4608,ck,0,0,59.1863,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1116.14,878.14,0.0 +gfx950,256,5320,8192,512,ck,0,0,59.2234,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.54,1588.58,0.0 +gfx950,256,5405,8192,512,ck,0,0,59.2268,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.54,1612.74,0.0 +gfx950,256,5373,8192,512,ck,0,0,59.2436,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.79,1603.15,0.0 +gfx950,256,1144,7168,4096,ck,0,0,59.246,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1133.85,851.47,0.0 +gfx950,256,1007,7168,4608,ck,0,0,59.2724,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1122.32,879.11,0.0 +gfx950,256,5462,8192,512,ck,0,0,59.3119,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.5,1626.66,0.0 +gfx950,256,5454,8192,512,ck,0,0,59.3363,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.05,1623.71,0.0 +gfx950,256,1023,7168,4608,ck,0,0,59.3412,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1138.83,883.2,0.0 +gfx950,256,1132,7168,4096,ck,0,0,59.3628,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1119.75,846.07,0.0 +gfx950,256,5413,8192,512,ck,0,0,59.3686,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.84,1611.16,0.0 +gfx950,256,1015,7168,4608,ck,0,0,59.408,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1128.66,879.65,0.0 +gfx950,256,5498,8192,512,ck,0,0,59.4238,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.13,1633.83,0.0 +gfx950,256,5437,8192,512,ck,0,0,59.4338,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.39,1616.22,0.0 +gfx950,256,5574,8192,512,ck,0,0,59.4358,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.7,1655.11,0.0 +gfx950,256,5285,8192,512,ck,0,0,59.438,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.88,1572.89,0.0 +gfx950,256,5461,8192,512,ck,0,0,59.4779,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.21,1621.84,0.0 +gfx950,256,5367,8192,512,ck,0,0,59.4964,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.71,1594.64,0.0 +gfx950,256,5608,8192,512,ck,0,0,59.5006,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.64,1662.96,0.0 +gfx950,256,5121,8192,512,ck,0,0,59.5148,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,721.8,1524.31,0.0 +gfx950,256,2548,8192,1536,ck,0,0,59.5327,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1077.1,978.34,0.0 +gfx950,256,5258,8192,512,ck,0,0,59.5356,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.86,1562.65,0.0 +gfx950,256,5566,8192,512,ck,0,0,59.5456,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.12,1649.79,0.0 +gfx950,256,5309,8192,512,ck,0,0,59.5596,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.74,1576.49,0.0 +gfx950,256,5538,8192,512,ck,0,0,59.5599,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.99,1641.45,0.0 +gfx950,256,5501,8192,512,ck,0,0,59.5669,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.69,1630.76,0.0 +gfx950,256,400,9216,7168,ck,14,0,59.6141,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,886.51,1279.9,0.0 +gfx950,256,5482,8192,512,ck,0,0,59.6339,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.14,1623.54,0.0 +gfx950,256,5449,8192,512,ck,0,0,59.6538,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.25,1613.65,0.0 +gfx950,256,5252,8192,512,ck,0,0,59.6608,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,738.46,1557.67,0.0 +gfx950,256,5568,8192,512,ck,0,0,59.6633,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.86,1647.1,0.0 +gfx950,256,5474,8192,512,ck,0,0,59.6738,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.5,1620.19,0.0 +gfx950,256,5625,8192,512,ck,0,0,59.678,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.68,1662.83,0.0 +gfx950,256,5398,8192,512,ck,0,0,59.6865,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.66,1598.33,0.0 +gfx950,256,5473,8192,512,ck,0,0,59.6931,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.11,1619.39,0.0 +gfx950,256,5428,8192,512,ck,0,0,59.6935,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.79,1606.64,0.0 +gfx950,256,5497,8192,512,ck,0,0,59.6998,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.4,1626.0,0.0 +gfx950,256,5611,8192,512,ck,0,0,59.7096,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.29,1657.99,0.0 +gfx950,256,5533,8192,512,ck,0,0,59.7188,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.21,1635.66,0.0 +gfx950,256,1003,7168,4608,ck,0,0,59.73,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1109.3,871.1,0.0 +gfx950,256,5402,8192,512,ck,0,0,59.7555,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.34,1597.62,0.0 +gfx950,256,5616,8192,512,ck,0,0,59.7568,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.37,1658.09,0.0 +gfx950,256,5335,8192,512,ck,0,0,59.7575,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.91,1578.62,0.0 +gfx950,256,5408,8192,512,ck,0,0,59.7628,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.09,1599.12,0.0 +gfx950,256,5603,8192,512,ck,0,0,59.7639,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.45,1654.22,0.0 +gfx950,256,5278,8192,512,ck,0,0,59.7652,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.82,1562.3,0.0 +gfx950,256,5629,8192,512,ck,0,0,59.7838,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.84,1661.02,0.0 +gfx950,256,5584,8192,512,ck,0,0,59.7872,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.48,1648.21,0.0 +gfx950,256,5539,8192,512,ck,0,0,59.7915,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.11,1635.37,0.0 +gfx950,256,5602,8192,512,ck,0,0,59.7987,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.85,1652.97,0.0 +gfx950,256,5388,8192,512,ck,0,0,59.8072,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.73,1592.28,0.0 +gfx950,256,5389,8192,512,ck,0,0,59.8422,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.42,1591.63,0.0 +gfx950,256,5631,8192,512,ck,0,0,59.8496,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.25,1659.76,0.0 +gfx950,256,5250,8192,512,ck,0,0,59.8543,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,735.79,1552.07,0.0 +gfx950,256,5284,8192,512,ck,0,0,59.8568,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.52,1561.61,0.0 +gfx950,256,5442,8192,512,ck,0,0,59.8577,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.66,1606.18,0.0 +gfx950,256,5610,8192,512,ck,0,0,59.8584,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.19,1653.58,0.0 +gfx950,256,5465,8192,512,ck,0,0,59.8787,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.61,1612.11,0.0 +gfx950,256,5579,8192,512,ck,0,0,59.8844,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.51,1644.12,0.0 +gfx950,256,5511,8192,512,ck,0,0,59.8859,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.96,1624.89,0.0 +gfx950,256,5450,8192,512,ck,0,0,59.8932,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.32,1607.49,0.0 +gfx950,256,5546,8192,512,ck,0,0,59.894,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.76,1634.55,0.0 +gfx950,256,5292,8192,512,ck,0,0,59.8948,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,741.17,1562.87,0.0 +gfx950,256,5281,8192,512,ck,0,0,59.9012,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.56,1559.6,0.0 +gfx950,256,5517,8192,512,ck,0,0,59.9048,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.56,1626.07,0.0 +gfx950,256,5436,8192,512,ck,0,0,59.9307,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.89,1602.53,0.0 +gfx950,256,5396,8192,512,ck,0,0,59.9311,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.28,1591.25,0.0 +gfx950,256,5433,8192,512,ck,0,0,59.9323,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.45,1601.65,0.0 +gfx950,256,5544,8192,512,ck,0,0,59.944,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.83,1632.62,0.0 +gfx950,256,5415,8192,512,ck,0,0,59.946,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.75,1596.21,0.0 +gfx950,256,5418,8192,512,ck,0,0,59.9498,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.13,1596.95,0.0 +gfx950,256,5403,8192,512,ck,0,0,59.9631,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.86,1592.37,0.0 +gfx950,256,5460,8192,512,ck,0,0,59.9904,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.49,1607.7,0.0 +gfx950,256,5532,8192,512,ck,0,0,59.9937,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.51,1627.89,0.0 +gfx950,256,5476,8192,512,ck,0,0,60.0064,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.52,1611.77,0.0 +gfx950,256,5502,8192,512,ck,0,0,60.0175,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.01,1618.8,0.0 +gfx950,256,5451,8192,512,ck,0,0,60.042,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.57,1603.78,0.0 +gfx950,256,5492,8192,512,ck,0,0,60.042,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.3,1615.32,0.0 +gfx950,256,5548,8192,512,ck,0,0,60.054,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.97,1630.75,0.0 +gfx950,256,5391,8192,512,ck,0,0,60.0633,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.92,1586.34,0.0 +gfx950,256,5590,8192,512,ck,0,0,60.1311,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.83,1640.46,0.0 +gfx950,256,5458,8192,512,ck,0,0,60.1359,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.36,1603.25,0.0 +gfx950,256,5491,8192,512,ck,0,0,60.1526,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.75,1612.07,0.0 +gfx950,256,5456,8192,512,ck,0,0,60.1564,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.82,1602.14,0.0 +gfx950,256,5559,8192,512,ck,0,0,60.162,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.11,1630.92,0.0 +gfx950,256,5594,8192,512,ck,0,0,60.1796,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.76,1640.27,0.0 +gfx950,256,5409,8192,512,ck,0,0,60.1842,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.92,1588.2,0.0 +gfx950,256,5496,8192,512,ck,0,0,60.1945,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.91,1612.35,0.0 +gfx950,256,5597,8192,512,ck,0,0,60.3088,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.51,1637.59,0.0 +gfx950,256,5580,8192,512,ck,0,0,60.3154,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.06,1632.65,0.0 +gfx950,256,5500,8192,512,ck,0,0,60.3253,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.81,1609.98,0.0 +gfx950,256,5598,8192,512,ck,0,0,60.3307,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.37,1637.28,0.0 +gfx950,256,5537,8192,512,ck,0,0,60.3368,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.81,1620.03,0.0 +gfx950,256,5434,8192,512,ck,0,0,60.4143,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.52,1589.15,0.0 +gfx950,256,5416,8192,512,ck,0,0,60.4194,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.96,1583.98,0.0 +gfx950,256,5493,8192,512,ck,0,0,60.4204,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.63,1605.48,0.0 +gfx950,256,5441,8192,512,ck,0,0,60.4332,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.25,1590.61,0.0 +gfx950,256,5390,8192,512,ck,0,0,60.4503,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,747.96,1575.9,0.0 +gfx950,256,5459,8192,512,ck,0,0,60.4619,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.39,1594.88,0.0 +gfx950,256,5600,8192,512,ck,0,0,60.5199,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.21,1632.72,0.0 +gfx950,256,5509,8192,512,ck,0,0,60.5548,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.16,1606.39,0.0 +gfx950,256,5479,8192,512,ck,0,0,60.5695,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.82,1597.63,0.0 +gfx950,256,5531,8192,512,ck,0,0,60.5981,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.66,1611.37,0.0 +gfx950,256,5545,8192,512,ck,0,0,60.6052,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.51,1615.09,0.0 +gfx950,256,5414,8192,512,ck,0,0,60.6149,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.25,1578.31,0.0 +gfx950,256,5447,8192,512,ck,0,0,60.7607,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.01,1583.7,0.0 +gfx950,256,5540,8192,512,ck,0,0,60.8412,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.84,1607.43,0.0 +gfx950,256,5488,8192,512,ck,0,0,60.8531,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.52,1592.68,0.0 +gfx950,256,5448,8192,512,ck,0,0,60.8796,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,750.68,1580.89,0.0 +gfx950,256,5503,8192,512,ck,0,0,61.0147,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.58,1592.62,0.0 +gfx950,256,5547,8192,512,ck,0,0,61.0452,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.25,1604.0,0.0 +gfx950,256,5385,8192,512,ck,0,0,61.069,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,739.7,1558.55,0.0 +gfx950,256,5431,8192,512,ck,0,0,61.079,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.9,1571.02,0.0 +gfx950,256,5506,8192,512,ck,0,0,61.0861,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.11,1591.58,0.0 +gfx950,256,432,9216,7168,ck,14,0,61.0943,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,934.23,1262.3,0.0 +gfx950,256,2534,8192,1536,ck,0,0,61.1898,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1042.17,947.74,0.0 +gfx950,256,5350,8192,512,ck,0,0,61.4638,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.17,1538.92,0.0 +gfx950,256,1047,7168,4608,ck,0,0,61.6439,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1122.01,857.58,0.0 +gfx950,256,1087,7168,4608,ck,0,0,62.0055,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1158.08,864.8,0.0 +gfx950,256,5712,8192,512,ck,0,0,62.251,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.72,1617.71,0.0 +gfx950,256,5728,8192,512,ck,0,0,62.2658,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.69,1621.67,0.0 +gfx950,256,5698,8192,512,ck,0,0,62.2758,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.53,1613.27,0.0 +gfx950,256,5649,8192,512,ck,0,0,62.3031,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.59,1599.28,0.0 +gfx950,256,5640,8192,512,ck,0,0,62.3822,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.42,1594.81,0.0 +gfx950,256,5644,8192,512,ck,0,0,62.465,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.95,1593.78,0.0 +gfx950,256,5713,8192,512,ck,0,0,62.5038,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.74,1611.44,0.0 +gfx950,256,5752,8192,512,ck,0,0,62.55,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.4,1620.78,0.0 +gfx950,256,5747,8192,512,ck,0,0,62.5754,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.42,1618.78,0.0 +gfx950,256,5642,8192,512,ck,0,0,62.6224,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.78,1589.23,0.0 +gfx950,256,5676,8192,512,ck,0,0,62.6356,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.17,1598.07,0.0 +gfx950,256,1040,7168,4608,ck,0,0,62.6552,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1096.52,841.62,0.0 +gfx950,256,5635,8192,512,ck,0,0,62.7042,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.85,1585.27,0.0 +gfx950,256,5678,8192,512,ck,0,0,62.7257,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.35,1596.31,0.0 +gfx950,256,5708,8192,512,ck,0,0,62.7716,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.8,1603.22,0.0 +gfx950,256,5680,8192,512,ck,0,0,62.8446,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.18,1593.83,0.0 +gfx950,256,5703,8192,512,ck,0,0,62.8632,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.02,1599.54,0.0 +gfx950,256,5668,8192,512,ck,0,0,62.8654,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.32,1590.08,0.0 +gfx950,256,1112,7168,4608,ck,0,0,62.866,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1168.5,860.49,0.0 +gfx950,256,5803,8192,512,ck,0,0,62.8684,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.3,1626.28,0.0 +gfx950,256,5709,8192,512,ck,0,0,62.8916,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.48,1600.43,0.0 +gfx950,256,5683,8192,512,ck,0,0,62.9024,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.88,1593.17,0.0 +gfx950,256,5779,8192,512,ck,0,0,62.9077,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.62,1618.82,0.0 +gfx950,256,5756,8192,512,ck,0,0,62.9176,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.43,1612.39,0.0 +gfx950,256,5832,8192,512,ck,0,0,62.9213,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.52,1632.7,0.0 +gfx950,256,5790,8192,512,ck,0,0,62.9526,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.53,1620.62,0.0 +gfx950,256,5834,8192,512,ck,0,0,63.0098,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.69,1630.95,0.0 +gfx950,256,5827,8192,512,ck,0,0,63.0409,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.38,1628.27,0.0 +gfx950,256,5769,8192,512,ck,0,0,63.056,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.47,1612.33,0.0 +gfx950,256,5742,8192,512,ck,0,0,63.0672,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.75,1604.81,0.0 +gfx950,256,5881,8192,512,ck,0,0,63.0816,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.06,1641.68,0.0 +gfx950,256,5750,8192,512,ck,0,0,63.0853,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.59,1606.5,0.0 +gfx950,256,5671,8192,512,ck,0,0,63.0951,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.97,1585.09,0.0 +gfx950,256,5738,8192,512,ck,0,0,63.1256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.51,1602.26,0.0 +gfx950,256,5787,8192,512,ck,0,0,63.1258,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.02,1615.37,0.0 +gfx950,256,5638,8192,512,ck,0,0,63.1287,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.18,1575.42,0.0 +gfx950,256,5687,8192,512,ck,0,0,63.1304,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.67,1588.49,0.0 +gfx950,256,5690,8192,512,ck,0,0,63.1378,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.98,1589.1,0.0 +gfx950,256,5686,8192,512,ck,0,0,63.1384,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.45,1588.02,0.0 +gfx950,256,5770,8192,512,ck,0,0,63.1418,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.56,1610.41,0.0 +gfx950,256,5843,8192,512,ck,0,0,63.1517,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.14,1629.69,0.0 +gfx950,256,5677,8192,512,ck,0,0,63.1907,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.63,1584.3,0.0 +gfx950,256,5812,8192,512,ck,0,0,63.1933,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.52,1620.33,0.0 +gfx950,256,5845,8192,512,ck,0,0,63.1947,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.88,1629.11,0.0 +gfx950,256,5705,8192,512,ck,0,0,63.2086,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.13,1591.33,0.0 +gfx950,256,3332,6144,1536,ck,0,0,63.2732,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,993.93,877.13,0.0 +gfx950,256,5704,8192,512,ck,0,0,63.291,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.01,1589.0,0.0 +gfx950,256,5735,8192,512,ck,0,0,63.2958,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.06,1597.15,0.0 +gfx950,256,1132,7168,4608,ck,0,0,63.3004,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1181.36,860.58,0.0 +gfx950,256,5681,8192,512,ck,0,0,63.3448,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.32,1581.51,0.0 +gfx950,256,5672,8192,512,ck,0,0,63.3448,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.13,1579.11,0.0 +gfx950,256,5888,8192,512,ck,0,0,63.3852,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.24,1635.68,0.0 +gfx950,256,5777,8192,512,ck,0,0,63.4344,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.95,1604.85,0.0 +gfx950,256,5707,8192,512,ck,0,0,63.4736,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.23,1585.22,0.0 +gfx950,256,5800,8192,512,ck,0,0,63.4905,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.32,1609.55,0.0 +gfx950,256,5730,8192,512,ck,0,0,63.494,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.03,1590.83,0.0 +gfx950,256,5667,8192,512,ck,0,0,63.5,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.63,1573.92,0.0 +gfx950,256,5846,8192,512,ck,0,0,63.5168,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.08,1621.12,0.0 +gfx950,256,5755,8192,512,ck,0,0,63.5362,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.83,1596.43,0.0 +gfx950,256,5799,8192,512,ck,0,0,63.5497,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.47,1607.78,0.0 +gfx950,256,5808,8192,512,ck,0,0,63.5797,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.3,1609.42,0.0 +gfx950,256,5675,8192,512,ck,0,0,63.5823,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,748.72,1574.01,0.0 +gfx950,256,5639,8192,512,ck,0,0,63.5987,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,743.78,1564.04,0.0 +gfx950,256,5810,8192,512,ck,0,0,63.6121,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.17,1609.13,0.0 +gfx950,256,5868,8192,512,ck,0,0,63.6158,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.78,1624.44,0.0 +gfx950,256,5759,8192,512,ck,0,0,63.6164,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.4,1595.47,0.0 +gfx950,256,5833,8192,512,ck,0,0,63.6457,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.8,1614.39,0.0 +gfx950,256,5837,8192,512,ck,0,0,63.6657,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.08,1614.94,0.0 +gfx950,256,5847,8192,512,ck,0,0,63.6853,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.17,1617.1,0.0 +gfx950,256,5864,8192,512,ck,0,0,63.714,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.06,1620.88,0.0 +gfx950,256,5831,8192,512,ck,0,0,63.7637,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.11,1610.87,0.0 +gfx950,256,5882,8192,512,ck,0,0,63.7745,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.69,1624.11,0.0 +gfx950,256,5829,8192,512,ck,0,0,63.7961,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.46,1609.52,0.0 +gfx950,256,5791,8192,512,ck,0,0,63.8196,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.18,1598.87,0.0 +gfx950,256,5849,8192,512,ck,0,0,63.9122,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.69,1611.88,0.0 +gfx950,256,5967,8192,512,ck,0,0,63.9285,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.98,1642.66,0.0 +gfx950,256,5754,8192,512,ck,0,0,63.9713,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.53,1585.3,0.0 +gfx950,256,5956,8192,512,ck,0,0,64.0442,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.13,1636.79,0.0 +gfx950,256,5806,8192,512,ck,0,0,64.0561,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.34,1596.92,0.0 +gfx950,256,5835,8192,512,ck,0,0,64.0725,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.94,1604.16,0.0 +gfx950,256,5495,8192,512,ck,0,0,64.0836,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,719.3,1514.24,0.0 +gfx950,256,5851,8192,512,ck,0,0,64.0848,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.89,1608.07,0.0 +gfx950,256,5871,8192,512,ck,0,0,64.0879,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.47,1613.26,0.0 +gfx950,256,1144,7168,4608,ck,0,0,64.1186,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1178.64,853.14,0.0 +gfx950,256,5870,8192,512,ck,0,0,64.1277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.86,1612.0,0.0 +gfx950,256,6000,8192,512,ck,0,0,64.1456,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.65,1645.79,0.0 +gfx950,256,5797,8192,512,ck,0,0,64.1949,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.52,1591.1,0.0 +gfx950,256,5634,8192,512,ck,0,0,64.2008,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.15,1548.05,0.0 +gfx950,256,5899,8192,512,ck,0,0,64.2342,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.37,1616.95,0.0 +gfx950,256,5844,8192,512,ck,0,0,64.2397,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.13,1602.35,0.0 +gfx950,256,5981,8192,512,ck,0,0,64.2432,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.97,1638.29,0.0 +gfx950,256,2567,8192,1536,ck,0,0,64.2636,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1005.25,911.61,0.0 +gfx950,256,5953,8192,512,ck,0,0,64.2712,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.98,1630.22,0.0 +gfx950,256,5896,8192,512,ck,0,0,64.3075,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.11,1614.32,0.0 +gfx950,256,5782,8192,512,ck,0,0,64.3176,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.12,1584.12,0.0 +gfx950,256,5902,8192,512,ck,0,0,64.3276,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.65,1615.4,0.0 +gfx950,256,2594,8192,1536,ck,0,0,64.3373,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1014.65,918.09,0.0 +gfx950,256,5809,8192,512,ck,0,0,64.3868,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.82,1589.51,0.0 +gfx950,256,5937,8192,512,ck,0,0,64.4096,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.23,1622.52,0.0 +gfx950,256,5876,8192,512,ck,0,0,64.4437,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.88,1605.67,0.0 +gfx950,256,5939,8192,512,ck,0,0,64.4585,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.9,1621.81,0.0 +gfx950,256,5891,8192,512,ck,0,0,64.4624,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.61,1609.13,0.0 +gfx950,256,5895,8192,512,ck,0,0,64.4725,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.01,1609.93,0.0 +gfx950,256,5965,8192,512,ck,0,0,64.5776,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.85,1625.62,0.0 +gfx950,256,5968,8192,512,ck,0,0,64.5941,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.04,1625.99,0.0 +gfx950,256,5930,8192,512,ck,0,0,64.6004,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.03,1615.9,0.0 +gfx950,256,6009,8192,512,ck,0,0,64.6188,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.07,1636.09,0.0 +gfx950,256,5903,8192,512,ck,0,0,64.6246,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.24,1608.23,0.0 +gfx950,256,5931,8192,512,ck,0,0,64.6356,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.74,1615.28,0.0 +gfx950,256,5945,8192,512,ck,0,0,64.6445,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.45,1618.72,0.0 +gfx950,256,5972,8192,512,ck,0,0,64.6481,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.91,1625.68,0.0 +gfx950,256,5962,8192,512,ck,0,0,64.652,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.57,1622.97,0.0 +gfx950,256,5938,8192,512,ck,0,0,64.6972,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.92,1615.57,0.0 +gfx950,256,5898,8192,512,ck,0,0,64.7288,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.36,1604.34,0.0 +gfx950,256,5924,8192,512,ck,0,0,64.7309,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.7,1611.07,0.0 +gfx950,256,5978,8192,512,ck,0,0,64.776,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.16,1624.04,0.0 +gfx950,256,5762,8192,512,ck,0,0,64.8018,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,745.89,1567.07,0.0 +gfx950,256,5894,8192,512,ck,0,0,64.8109,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.87,1601.26,0.0 +gfx950,256,6011,8192,512,ck,0,0,64.8145,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.97,1631.67,0.0 +gfx950,256,5996,8192,512,ck,0,0,64.8329,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.81,1627.3,0.0 +gfx950,256,5904,8192,512,ck,0,0,64.8565,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.63,1602.74,0.0 +gfx950,256,5885,8192,512,ck,0,0,64.8715,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.0,1597.42,0.0 +gfx950,256,5911,8192,512,ck,0,0,64.8957,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.07,1603.6,0.0 +gfx950,256,5916,8192,512,ck,0,0,64.9024,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.64,1604.73,0.0 +gfx950,256,5983,8192,512,ck,0,0,64.9344,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.92,1621.38,0.0 +gfx950,256,5890,8192,512,ck,0,0,64.9488,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.74,1596.82,0.0 +gfx950,256,5949,8192,512,ck,0,0,65.0277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.42,1610.22,0.0 +gfx950,256,5955,8192,512,ck,0,0,65.0413,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.04,1611.44,0.0 +gfx950,256,5933,8192,512,ck,0,0,65.0777,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.77,1604.82,0.0 +gfx950,256,5958,8192,512,ck,0,0,65.0837,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.92,1611.17,0.0 +gfx950,256,5980,8192,512,ck,0,0,65.0933,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.65,1616.64,0.0 +gfx950,256,5950,8192,512,ck,0,0,65.1172,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.5,1608.26,0.0 +gfx950,256,5973,8192,512,ck,0,0,65.1249,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.37,1614.04,0.0 +gfx950,256,5926,8192,512,ck,0,0,65.1569,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.94,1601.06,0.0 +gfx950,256,5992,8192,512,ck,0,0,65.2521,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.31,1615.81,0.0 +gfx950,256,3451,6144,1536,ck,0,0,65.2973,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,997.52,875.13,0.0 +gfx950,256,5982,8192,512,ck,0,0,65.3037,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.42,1611.95,0.0 +gfx950,256,2658,8192,1536,ck,0,0,65.3117,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1024.18,921.95,0.0 +gfx950,256,5988,8192,512,ck,0,0,65.316,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.05,1613.2,0.0 +gfx950,256,3503,6144,1536,ck,0,0,65.3444,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1011.82,885.5,0.0 +gfx950,256,5889,8192,512,ck,0,0,65.3453,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.99,1586.88,0.0 +gfx950,256,5925,8192,512,ck,0,0,65.4053,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.92,1594.72,0.0 +gfx950,256,6022,8192,512,ck,0,0,65.4217,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.16,1619.37,0.0 +gfx950,256,6008,8192,512,ck,0,0,65.4781,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.7,1614.36,0.0 +gfx950,256,2597,8192,1536,ck,0,0,65.5409,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,997.17,902.05,0.0 +gfx950,256,6021,8192,512,ck,0,0,65.6357,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.52,1613.83,0.0 +gfx950,256,6127,8192,512,ck,0,0,65.6357,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.06,1641.12,0.0 +gfx950,256,5942,8192,512,ck,0,0,65.6376,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.4,1593.45,0.0 +gfx950,256,5974,8192,512,ck,0,0,65.7848,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.78,1598.1,0.0 +gfx950,256,6080,8192,512,ck,0,0,65.7896,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.24,1625.21,0.0 +gfx950,256,6052,8192,512,ck,0,0,65.8329,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.16,1616.96,0.0 +gfx950,256,2570,8192,1536,ck,0,0,65.8729,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,981.83,890.16,0.0 +gfx950,256,2713,8192,1536,ck,0,0,65.9292,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1035.58,928.27,0.0 +gfx950,256,6030,8192,512,ck,0,0,65.9309,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.22,1608.91,0.0 +gfx950,256,6064,8192,512,ck,0,0,66.0413,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.25,1614.92,0.0 +gfx950,256,6073,8192,512,ck,0,0,66.0562,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.22,1616.86,0.0 +gfx950,256,2794,8192,1536,ck,0,0,66.0597,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1064.39,948.41,0.0 +gfx950,256,6025,8192,512,ck,0,0,66.0625,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.05,1604.43,0.0 +gfx950,256,6104,8192,512,ck,0,0,66.1433,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.14,1622.65,0.0 +gfx950,256,6141,8192,512,ck,0,0,66.1845,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.35,1631.09,0.0 +gfx950,256,6084,8192,512,ck,0,0,66.1953,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.0,1616.27,0.0 +gfx950,256,6092,8192,512,ck,0,0,66.2418,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.47,1617.18,0.0 +gfx950,256,2729,8192,1536,ck,0,0,66.2449,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1036.72,928.17,0.0 +gfx950,256,6099,8192,512,ck,0,0,66.2667,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.06,1618.35,0.0 +gfx950,256,6095,8192,512,ck,0,0,66.2957,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.22,1616.63,0.0 +gfx950,256,2636,8192,1536,ck,0,0,66.298,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1000.59,902.29,0.0 +gfx950,256,6050,8192,512,ck,0,0,66.3132,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.32,1604.73,0.0 +gfx950,256,3587,6144,1536,ck,0,0,66.32,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1020.84,889.99,0.0 +gfx950,256,6097,8192,512,ck,0,0,66.327,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.11,1616.37,0.0 +gfx950,256,6075,8192,512,ck,0,0,66.3329,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.26,1610.63,0.0 +gfx950,256,6067,8192,512,ck,0,0,66.3811,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.69,1607.42,0.0 +gfx950,256,6035,8192,512,ck,0,0,66.3968,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.47,1598.9,0.0 +gfx950,256,6077,8192,512,ck,0,0,66.3993,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.74,1609.52,0.0 +gfx950,256,6041,8192,512,ck,0,0,66.4017,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.17,1600.31,0.0 +gfx950,256,6126,8192,512,ck,0,0,66.4097,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.81,1621.74,0.0 +gfx950,256,2747,8192,1536,ck,0,0,66.4448,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1040.42,930.23,0.0 +gfx950,256,3620,6144,1536,ck,0,0,66.4912,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1027.58,894.56,0.0 +gfx950,256,6049,8192,512,ck,0,0,66.5141,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.89,1599.63,0.0 +gfx950,256,2593,8192,1536,ck,0,0,66.5433,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,980.64,887.38,0.0 +gfx950,256,6069,8192,512,ck,0,0,66.5449,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.05,1603.97,0.0 +gfx950,256,6034,8192,512,ck,0,0,66.5457,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.63,1595.07,0.0 +gfx950,256,6121,8192,512,ck,0,0,66.5937,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.04,1615.99,0.0 +gfx950,256,3662,6144,1536,ck,0,0,66.6009,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1037.79,901.8,0.0 +gfx950,256,6139,8192,512,ck,0,0,66.6293,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.9,1619.69,0.0 +gfx950,256,3617,6144,1536,ck,0,0,66.6313,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1024.57,892.05,0.0 +gfx950,256,6105,8192,512,ck,0,0,66.7321,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.43,1608.59,0.0 +gfx950,256,6029,8192,512,ck,0,0,66.7325,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.88,1589.33,0.0 +gfx950,256,6133,8192,512,ck,0,0,66.7489,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.76,1615.27,0.0 +gfx950,256,2740,8192,1536,ck,0,0,66.8121,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1032.06,923.24,0.0 +gfx950,256,6134,8192,512,ck,0,0,66.8465,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.76,1613.16,0.0 +gfx950,256,3840,6144,1536,ck,0,0,66.8818,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1083.67,934.8,0.0 +gfx950,256,3655,6144,1536,ck,0,0,66.8915,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1031.31,896.44,0.0 +gfx950,256,2748,8192,1536,ck,0,0,66.9061,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1033.62,924.09,0.0 +gfx950,256,6106,8192,512,ck,0,0,66.9353,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.23,1603.96,0.0 +gfx950,256,6070,8192,512,ck,0,0,66.9365,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.7,1594.84,0.0 +gfx950,256,2841,8192,1536,ck,0,0,67.0773,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1065.88,946.57,0.0 +gfx950,256,6368,8192,512,ck,0,0,67.0877,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.25,1666.3,0.0 +gfx950,256,6215,8192,512,ck,0,0,67.1264,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.67,1626.83,0.0 +gfx950,256,6062,8192,512,ck,0,0,67.1363,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.44,1588.08,0.0 +gfx950,256,6171,8192,512,ck,0,0,67.1641,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.74,1614.84,0.0 +gfx950,256,2734,8192,1536,ck,0,0,67.1978,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1023.89,916.34,0.0 +gfx950,256,6020,8192,512,ck,0,0,67.2259,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,751.19,1575.41,0.0 +gfx950,256,2757,8192,1536,ck,0,0,67.2386,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1031.88,921.92,0.0 +gfx950,256,6227,8192,512,ck,0,0,67.2601,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.62,1626.61,0.0 +gfx950,256,6182,8192,512,ck,0,0,67.2921,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.65,1614.53,0.0 +gfx950,256,6169,8192,512,ck,0,0,67.3342,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.54,1610.26,0.0 +gfx950,256,3632,6144,1536,ck,0,0,67.3461,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1017.9,885.66,0.0 +gfx950,256,6166,8192,512,ck,0,0,67.3637,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.83,1608.8,0.0 +gfx950,256,6278,8192,512,ck,0,0,67.3801,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.59,1636.5,0.0 +gfx950,256,3824,6144,1536,ck,0,0,67.4936,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1069.37,923.05,0.0 +gfx950,256,6324,8192,512,ck,0,0,67.5544,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.29,1643.78,0.0 +gfx950,256,6177,8192,512,ck,0,0,67.5629,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.94,1606.81,0.0 +gfx950,256,2843,8192,1536,ck,0,0,67.5916,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1058.51,939.9,0.0 +gfx950,256,6204,8192,512,ck,0,0,67.6065,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.79,1612.52,0.0 +gfx950,256,6238,8192,512,ck,0,0,67.6477,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.54,1620.03,0.0 +gfx950,256,6186,8192,512,ck,0,0,67.668,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.86,1606.56,0.0 +gfx950,256,6236,8192,512,ck,0,0,67.7049,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.64,1618.17,0.0 +gfx950,256,6168,8192,512,ck,0,0,67.7089,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.17,1601.1,0.0 +gfx950,256,6179,8192,512,ck,0,0,67.7205,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.4,1603.57,0.0 +gfx950,256,6288,8192,512,ck,0,0,67.7253,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.85,1630.65,0.0 +gfx950,256,6239,8192,512,ck,0,0,67.7317,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.7,1618.27,0.0 +gfx950,256,3004,8192,1536,ck,0,0,67.7521,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1115.81,980.26,0.0 +gfx950,256,6283,8192,512,ck,0,0,67.7617,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.81,1628.53,0.0 +gfx950,256,6173,8192,512,ck,0,0,67.7797,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.99,1600.68,0.0 +gfx950,256,2918,8192,1536,ck,0,0,67.8042,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1083.03,956.78,0.0 +gfx950,256,2861,8192,1536,ck,0,0,67.8478,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1061.19,941.11,0.0 +gfx950,256,6286,8192,512,ck,0,0,67.8617,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.03,1626.88,0.0 +gfx950,256,6200,8192,512,ck,0,0,67.8633,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.38,1605.43,0.0 +gfx950,256,6384,8192,512,ck,0,0,67.8753,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.99,1650.94,0.0 +gfx950,256,6322,8192,512,ck,0,0,67.8981,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.06,1634.96,0.0 +gfx950,256,6191,8192,512,ck,0,0,67.9155,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.68,1601.95,0.0 +gfx950,256,6366,8192,512,ck,0,0,67.9229,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.21,1645.31,0.0 +gfx950,256,6026,8192,512,ck,0,0,67.9281,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.17,1560.61,0.0 +gfx950,256,6365,8192,512,ck,0,0,67.931,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.0,1644.87,0.0 +gfx950,256,6294,8192,512,ck,0,0,67.9365,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.17,1627.07,0.0 +gfx950,256,6369,8192,512,ck,0,0,67.9413,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.37,1645.61,0.0 +gfx950,256,6207,8192,512,ck,0,0,67.9581,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.18,1604.93,0.0 +gfx950,256,6282,8192,512,ck,0,0,67.9645,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.36,1623.42,0.0 +gfx950,256,6202,8192,512,ck,0,0,67.9797,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.32,1603.17,0.0 +gfx950,256,6364,8192,512,ck,0,0,67.9805,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.3,1643.42,0.0 +gfx950,256,6362,8192,512,ck,0,0,67.9873,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.97,1642.76,0.0 +gfx950,256,6221,8192,512,ck,0,0,67.9962,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.48,1607.51,0.0 +gfx950,256,6359,8192,512,ck,0,0,68.0053,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.4,1641.58,0.0 +gfx950,256,6349,8192,512,ck,0,0,68.0169,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.03,1638.81,0.0 +gfx950,256,6328,8192,512,ck,0,0,68.0181,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.43,1633.57,0.0 +gfx950,256,6337,8192,512,ck,0,0,68.026,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.45,1635.61,0.0 +gfx950,256,6287,8192,512,ck,0,0,68.0554,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.94,1622.49,0.0 +gfx950,256,6180,8192,512,ck,0,0,68.0733,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.56,1595.51,0.0 +gfx950,256,6162,8192,512,ck,0,0,68.1017,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.02,1590.38,0.0 +gfx950,256,6198,8192,512,ck,0,0,68.1247,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.2,1598.77,0.0 +gfx950,256,6230,8192,512,ck,0,0,68.1445,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.91,1606.24,0.0 +gfx950,256,6319,8192,512,ck,0,0,68.1461,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.85,1628.27,0.0 +gfx950,256,6381,8192,512,ck,0,0,68.1469,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.48,1643.62,0.0 +gfx950,256,6277,8192,512,ck,0,0,68.1581,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.55,1617.57,0.0 +gfx950,256,6297,8192,512,ck,0,0,68.1669,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.91,1622.32,0.0 +gfx950,256,6172,8192,512,ck,0,0,68.1985,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.17,1590.6,0.0 +gfx950,256,6387,8192,512,ck,0,0,68.2113,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.47,1643.56,0.0 +gfx950,256,3749,6144,1536,ck,0,0,68.2486,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1036.8,897.65,0.0 +gfx950,256,2889,8192,1536,ck,0,0,68.2546,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1065.19,942.85,0.0 +gfx950,256,6159,8192,512,ck,0,0,68.2656,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.83,1585.82,0.0 +gfx950,256,6346,8192,512,ck,0,0,68.2785,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.66,1631.79,0.0 +gfx950,256,6395,8192,512,ck,0,0,68.3037,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.39,1643.31,0.0 +gfx950,256,6326,8192,512,ck,0,0,68.3249,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.68,1625.74,0.0 +gfx950,256,6201,8192,512,ck,0,0,68.3328,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.24,1594.64,0.0 +gfx950,256,6195,8192,512,ck,0,0,68.3465,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.35,1592.84,0.0 +gfx950,256,6228,8192,512,ck,0,0,68.3521,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.34,1600.87,0.0 +gfx950,256,6407,8192,512,ck,0,0,68.3617,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.2,1644.88,0.0 +gfx950,256,6259,8192,512,ck,0,0,68.3701,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.94,1608.11,0.0 +gfx950,256,6371,8192,512,ck,0,0,68.3733,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.65,1635.71,0.0 +gfx950,256,6244,8192,512,ck,0,0,68.3748,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.05,1604.29,0.0 +gfx950,256,6424,8192,512,ck,0,0,68.3884,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.98,1648.44,0.0 +gfx950,256,6148,8192,512,ck,0,0,68.4045,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.94,1579.88,0.0 +gfx950,256,6263,8192,512,ck,0,0,68.4157,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.92,1608.02,0.0 +gfx950,256,3000,8192,1536,ck,0,0,68.4185,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1103.47,969.66,0.0 +gfx950,256,6332,8192,512,ck,0,0,68.4289,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.23,1624.75,0.0 +gfx950,256,6458,8192,512,ck,0,0,68.4489,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.45,1655.38,0.0 +gfx950,256,6378,8192,512,ck,0,0,68.4573,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.55,1635.43,0.0 +gfx950,256,6376,8192,512,ck,0,0,68.4649,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.21,1634.75,0.0 +gfx950,256,6161,8192,512,ck,0,0,68.4809,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,754.7,1581.33,0.0 +gfx950,256,6192,8192,512,ck,0,0,68.4866,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.43,1588.84,0.0 +gfx950,256,6372,8192,512,ck,0,0,68.4937,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.4,1633.08,0.0 +gfx950,256,6157,8192,512,ck,0,0,68.5157,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.82,1579.54,0.0 +gfx950,256,6289,8192,512,ck,0,0,68.5569,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.52,1611.12,0.0 +gfx950,256,6445,8192,512,ck,0,0,68.5857,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.28,1648.87,0.0 +gfx950,256,6537,8192,512,ck,0,0,68.5909,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.47,1671.41,0.0 +gfx950,256,6318,8192,512,ck,0,0,68.6037,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.54,1617.16,0.0 +gfx950,256,6270,8192,512,ck,0,0,68.6119,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.58,1605.15,0.0 +gfx950,256,6323,8192,512,ck,0,0,68.6209,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.96,1617.99,0.0 +gfx950,256,6281,8192,512,ck,0,0,68.6265,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.76,1607.51,0.0 +gfx950,256,6241,8192,512,ck,0,0,68.6557,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.55,1596.99,0.0 +gfx950,256,2964,8192,1536,ck,0,0,68.6594,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1086.4,956.87,0.0 +gfx950,256,6390,8192,512,ck,0,0,68.6603,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.7,1633.55,0.0 +gfx950,256,6267,8192,512,ck,0,0,68.6685,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.58,1603.09,0.0 +gfx950,256,6226,8192,512,ck,0,0,68.671,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.55,1592.94,0.0 +gfx950,256,6284,8192,512,ck,0,0,68.6841,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.48,1606.9,0.0 +gfx950,256,6393,8192,512,ck,0,0,68.6849,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.79,1633.7,0.0 +gfx950,256,6491,8192,512,ck,0,0,68.6861,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.74,1657.78,0.0 +gfx950,256,6455,8192,512,ck,0,0,68.6918,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.28,1648.78,0.0 +gfx950,256,6345,8192,512,ck,0,0,68.6933,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.83,1621.69,0.0 +gfx950,256,6467,8192,512,ck,0,0,68.6949,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.71,1651.66,0.0 +gfx950,256,6382,8192,512,ck,0,0,68.7261,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.98,1630.02,0.0 +gfx950,256,6473,8192,512,ck,0,0,68.7285,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.06,1652.33,0.0 +gfx950,256,6335,8192,512,ck,0,0,68.7581,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.88,1617.71,0.0 +gfx950,256,6243,8192,512,ck,0,0,68.7884,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.32,1594.4,0.0 +gfx950,256,6280,8192,512,ck,0,0,68.8093,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.6,1603.0,0.0 +gfx950,256,6315,8192,512,ck,0,0,68.8125,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.83,1611.52,0.0 +gfx950,256,6483,8192,512,ck,0,0,68.8137,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.3,1652.74,0.0 +gfx950,256,6250,8192,512,ck,0,0,68.8221,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.8,1595.33,0.0 +gfx950,256,6624,8192,512,ck,0,0,68.8585,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.96,1686.26,0.0 +gfx950,256,6438,8192,512,ck,0,0,68.8644,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.23,1640.48,0.0 +gfx950,256,6422,8192,512,ck,0,0,68.8773,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.14,1636.25,0.0 +gfx950,256,6582,8192,512,ck,0,0,68.8781,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.62,1675.48,0.0 +gfx950,256,6247,8192,512,ck,0,0,68.9189,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.37,1592.36,0.0 +gfx950,256,6495,8192,512,ck,0,0,68.9377,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.34,1652.71,0.0 +gfx950,256,6374,8192,512,ck,0,0,68.9473,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.51,1622.83,0.0 +gfx950,256,2876,8192,1536,ck,0,0,68.955,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1049.63,929.89,0.0 +gfx950,256,6550,8192,512,ck,0,0,68.9573,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.8,1665.71,0.0 +gfx950,256,6527,8192,512,ck,0,0,68.9709,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.85,1659.75,0.0 +gfx950,256,6427,8192,512,ck,0,0,68.987,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.5,1634.87,0.0 +gfx950,256,6380,8192,512,ck,0,0,68.9889,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.77,1623.32,0.0 +gfx950,256,6577,8192,512,ck,0,0,68.9965,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.63,1671.38,0.0 +gfx950,256,6525,8192,512,ck,0,0,69.0385,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.83,1657.64,0.0 +gfx950,256,6433,8192,512,ck,0,0,69.0525,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.49,1634.79,0.0 +gfx950,256,6518,8192,512,ck,0,0,69.0613,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.72,1655.38,0.0 +gfx950,256,6593,8192,512,ck,0,0,69.0701,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.72,1673.51,0.0 +gfx950,256,6451,8192,512,ck,0,0,69.0749,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.42,1638.66,0.0 +gfx950,256,6490,8192,512,ck,0,0,69.0757,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.15,1648.18,0.0 +gfx950,256,6539,8192,512,ck,0,0,69.0781,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.07,1660.11,0.0 +gfx950,256,6265,8192,512,ck,0,0,69.0833,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.74,1592.97,0.0 +gfx950,256,6296,8192,512,ck,0,0,69.0945,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.38,1600.29,0.0 +gfx950,256,6444,8192,512,ck,0,0,69.0993,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.3,1636.37,0.0 +gfx950,256,6383,8192,512,ck,0,0,69.1205,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.65,1620.96,0.0 +gfx950,256,6521,8192,512,ck,0,0,69.1465,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.1,1654.07,0.0 +gfx950,256,6421,8192,512,ck,0,0,69.1489,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.95,1629.58,0.0 +gfx950,256,6412,8192,512,ck,0,0,69.1517,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.82,1627.31,0.0 +gfx950,256,6587,8192,512,ck,0,0,69.1801,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.72,1669.39,0.0 +gfx950,256,6430,8192,512,ck,0,0,69.1837,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.65,1630.96,0.0 +gfx950,256,6492,8192,512,ck,0,0,69.1845,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.15,1646.08,0.0 +gfx950,256,6275,8192,512,ck,0,0,69.1965,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.71,1592.81,0.0 +gfx950,256,6329,8192,512,ck,0,0,69.2053,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.16,1605.79,0.0 +gfx950,256,6377,8192,512,ck,0,0,69.2053,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.98,1617.51,0.0 +gfx950,256,6146,8192,512,ck,0,0,69.2401,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,744.6,1560.33,0.0 +gfx950,256,3831,6144,1536,ck,0,0,69.2555,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1044.07,900.97,0.0 +gfx950,256,6501,8192,512,ck,0,0,69.2917,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.03,1645.73,0.0 +gfx950,256,6417,8192,512,ck,0,0,69.2934,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.84,1625.2,0.0 +gfx950,256,6456,8192,512,ck,0,0,69.3337,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.1,1633.76,0.0 +gfx950,256,6627,8192,512,ck,0,0,69.3513,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.59,1675.01,0.0 +gfx950,256,6225,8192,512,ck,0,0,69.4037,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.4,1575.88,0.0 +gfx950,256,6436,8192,512,ck,0,0,69.4173,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.75,1626.93,0.0 +gfx950,256,6644,8192,512,ck,0,0,69.4177,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.88,1677.55,0.0 +gfx950,256,3894,6144,1536,ck,0,0,69.4186,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1058.75,911.4,0.0 +gfx950,256,4004,6144,1536,ck,0,0,69.4537,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1088.11,932.83,0.0 +gfx950,256,6415,8192,512,ck,0,0,69.4661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.66,1620.68,0.0 +gfx950,256,3019,8192,1536,ck,0,0,69.4671,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1093.69,959.93,0.0 +gfx950,256,6556,8192,512,ck,0,0,69.4761,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.58,1654.73,0.0 +gfx950,256,6443,8192,512,ck,0,0,69.4849,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.84,1627.05,0.0 +gfx950,256,4001,6144,1536,ck,0,0,69.4884,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1086.75,931.77,0.0 +gfx950,256,6469,8192,512,ck,0,0,69.5197,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.58,1632.55,0.0 +gfx950,256,6472,8192,512,ck,0,0,69.5201,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.94,1633.27,0.0 +gfx950,256,6524,8192,512,ck,0,0,69.5221,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.19,1645.86,0.0 +gfx950,256,6553,8192,512,ck,0,0,69.535,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.55,1652.6,0.0 +gfx950,256,6399,8192,512,ck,0,0,69.5389,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.92,1615.09,0.0 +gfx950,256,4051,6144,1536,ck,0,0,69.5461,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1099.42,940.93,0.0 +gfx950,256,2955,8192,1536,ck,0,0,69.551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1069.22,942.28,0.0 +gfx950,256,6560,8192,512,ck,0,0,69.5779,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.9,1653.28,0.0 +gfx950,256,6622,8192,512,ck,0,0,69.6009,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.11,1667.79,0.0 +gfx950,256,6513,8192,512,ck,0,0,69.6641,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.26,1639.84,0.0 +gfx950,256,3005,8192,1536,ck,0,0,69.669,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1085.47,953.54,0.0 +gfx950,256,6638,8192,512,ck,0,0,69.6781,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.15,1669.82,0.0 +gfx950,256,2946,8192,1536,ck,0,0,69.6834,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1063.93,938.18,0.0 +gfx950,256,6590,8192,512,ck,0,0,69.7361,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.72,1656.8,0.0 +gfx950,256,6460,8192,512,ck,0,0,69.7365,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.07,1625.3,0.0 +gfx950,256,2947,8192,1536,ck,0,0,69.805,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1062.44,936.8,0.0 +gfx950,256,6548,8192,512,ck,0,0,69.8289,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.62,1644.44,0.0 +gfx950,256,6566,8192,512,ck,0,0,69.8317,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.75,1648.73,0.0 +gfx950,256,6559,8192,512,ck,0,0,69.8514,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.68,1646.57,0.0 +gfx950,256,6482,8192,512,ck,0,0,69.8553,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.39,1627.85,0.0 +gfx950,256,6615,8192,512,ck,0,0,69.8659,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.25,1659.77,0.0 +gfx950,256,6581,8192,512,ck,0,0,69.8729,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.08,1651.38,0.0 +gfx950,256,3925,6144,1536,ck,0,0,69.8938,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1059.92,911.33,0.0 +gfx950,256,6419,8192,512,ck,0,0,69.9315,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.99,1610.86,0.0 +gfx950,256,6535,8192,512,ck,0,0,69.9553,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.64,1638.33,0.0 +gfx950,256,4002,6144,1536,ck,0,0,70.0033,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1079.02,925.11,0.0 +gfx950,256,6616,8192,512,ck,0,0,70.005,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.79,1656.71,0.0 +gfx950,256,2952,8192,1536,ck,0,0,70.007,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1061.17,935.37,0.0 +gfx950,256,4012,6144,1536,ck,0,0,70.0101,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1081.61,927.0,0.0 +gfx950,256,6418,8192,512,ck,0,0,70.0248,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.84,1608.47,0.0 +gfx950,256,6558,8192,512,ck,0,0,70.0261,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.6,1642.22,0.0 +gfx950,256,6564,8192,512,ck,0,0,70.0665,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.87,1642.72,0.0 +gfx950,256,6573,8192,512,ck,0,0,70.0729,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.87,1644.74,0.0 +gfx950,256,6562,8192,512,ck,0,0,70.1441,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.76,1640.42,0.0 +gfx950,256,6477,8192,512,ck,0,0,70.1717,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.29,1619.31,0.0 +gfx950,256,6498,8192,512,ck,0,0,70.1765,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.74,1624.25,0.0 +gfx950,256,6631,8192,512,ck,0,0,70.1845,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.55,1656.09,0.0 +gfx950,256,4046,6144,1536,ck,0,0,70.2046,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1087.76,931.12,0.0 +gfx950,256,6650,8192,512,ck,0,0,70.2157,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.47,1659.92,0.0 +gfx950,256,6425,8192,512,ck,0,0,70.2378,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.35,1605.28,0.0 +gfx950,256,6629,8192,512,ck,0,0,70.2541,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.53,1653.97,0.0 +gfx950,256,6628,8192,512,ck,0,0,70.2733,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.19,1653.27,0.0 +gfx950,256,6637,8192,512,ck,0,0,70.3885,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.97,1652.73,0.0 +gfx950,256,6585,8192,512,ck,0,0,70.3913,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.74,1640.18,0.0 +gfx950,256,6635,8192,512,ck,0,0,70.4094,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.5,1651.76,0.0 +gfx950,256,6549,8192,512,ck,0,0,70.4141,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.2,1631.01,0.0 +gfx950,256,6414,8192,512,ck,0,0,70.428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.97,1598.3,0.0 +gfx950,256,6557,8192,512,ck,0,0,70.4609,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.63,1631.85,0.0 +gfx950,256,6405,8192,512,ck,0,0,70.4847,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.28,1594.86,0.0 +gfx950,256,3971,6144,1536,ck,0,0,70.5347,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1062.6,912.07,0.0 +gfx950,256,6633,8192,512,ck,0,0,70.5541,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.64,1647.89,0.0 +gfx950,256,6583,8192,512,ck,0,0,70.6689,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.42,1633.26,0.0 +gfx950,256,6621,8192,512,ck,0,0,70.7912,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.57,1639.51,0.0 +gfx950,256,6636,8192,512,ck,0,0,71.0005,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.03,1638.24,0.0 +gfx950,256,4050,6144,1536,ck,0,0,71.0541,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1075.82,920.77,0.0 +gfx950,256,6713,8192,512,ck,0,0,71.1233,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.76,1653.71,0.0 +gfx950,256,6767,8192,512,ck,0,0,71.2273,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.97,1664.1,0.0 +gfx950,256,6722,8192,512,ck,0,0,71.2321,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.61,1653.32,0.0 +gfx950,256,4000,6144,1536,ck,0,0,71.2391,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1059.78,908.67,0.0 +gfx950,256,6761,8192,512,ck,0,0,71.3293,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.12,1660.3,0.0 +gfx950,256,6757,8192,512,ck,0,0,71.7326,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.18,1650.02,0.0 +gfx950,256,6679,8192,512,ck,0,0,71.7561,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.8,1631.12,0.0 +gfx950,256,6685,8192,512,ck,0,0,71.7629,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.43,1632.38,0.0 +gfx950,256,6720,8192,512,ck,0,0,71.7709,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.44,1640.43,0.0 +gfx950,256,6912,8192,512,ck,0,0,71.7817,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.76,1685.38,0.0 +gfx950,256,6670,8192,512,ck,0,0,71.8001,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.27,1628.0,0.0 +gfx950,256,6717,8192,512,ck,0,0,71.8181,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.57,1638.65,0.0 +gfx950,256,6756,8192,512,ck,0,0,71.8214,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.09,1647.75,0.0 +gfx950,256,6693,8192,512,ck,0,0,71.9377,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.47,1630.29,0.0 +gfx950,256,6530,8192,512,ck,0,0,71.9553,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.27,1591.62,0.0 +gfx950,256,6190,8192,512,ck,18,0,71.9979,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,721.21,1510.88,0.0 +gfx950,256,6673,8192,512,ck,0,0,72.0602,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.81,1622.83,0.0 +gfx950,256,6848,8192,512,ck,0,0,72.1277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.44,1662.3,0.0 +gfx950,256,6723,8192,512,ck,0,0,72.1313,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.86,1632.94,0.0 +gfx950,256,6664,8192,512,ck,0,0,72.1505,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.79,1618.69,0.0 +gfx950,256,6839,8192,512,ck,0,0,72.2085,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.5,1658.34,0.0 +gfx950,256,6888,8192,512,ck,0,0,72.2325,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.93,1669.25,0.0 +gfx950,256,6728,8192,512,ck,0,0,72.2333,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.34,1631.8,0.0 +gfx950,256,6714,8192,512,ck,0,0,72.2385,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.66,1628.41,0.0 +gfx950,256,6691,8192,512,ck,0,0,72.2541,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.82,1622.68,0.0 +gfx950,256,6740,8192,512,ck,0,0,72.2885,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.13,1633.36,0.0 +gfx950,256,6762,8192,512,ck,0,0,72.3313,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.22,1637.54,0.0 +gfx950,256,6682,8192,512,ck,0,0,72.3573,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.67,1618.27,0.0 +gfx950,256,6769,8192,512,ck,0,0,72.3585,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.74,1638.55,0.0 +gfx950,256,6869,8192,512,ck,0,0,72.4073,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.79,1660.78,0.0 +gfx950,256,6681,8192,512,ck,0,0,72.4357,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.71,1616.28,0.0 +gfx950,256,6694,8192,512,ck,0,0,72.4661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.89,1618.63,0.0 +gfx950,256,6765,8192,512,ck,0,0,72.4677,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.09,1635.15,0.0 +gfx950,256,6751,8192,512,ck,0,0,72.5022,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.1,1631.11,0.0 +gfx950,256,6707,8192,512,ck,0,0,72.5073,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.95,1620.74,0.0 +gfx950,256,6764,8192,512,ck,0,0,72.5385,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.21,1633.32,0.0 +gfx950,256,6806,8192,512,ck,0,0,72.5597,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.84,1642.63,0.0 +gfx950,256,6668,8192,512,ck,0,0,72.5687,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.79,1610.29,0.0 +gfx950,256,6672,8192,512,ck,0,0,72.574,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.2,1611.11,0.0 +gfx950,256,6706,8192,512,ck,0,0,72.5814,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.05,1618.86,0.0 +gfx950,256,6768,8192,512,ck,0,0,72.6129,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.87,1632.58,0.0 +gfx950,256,6854,8192,512,ck,0,0,72.6293,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.63,1652.22,0.0 +gfx950,256,6725,8192,512,ck,0,0,72.6593,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.41,1621.54,0.0 +gfx950,256,6897,8192,512,ck,0,0,72.6643,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.21,1661.42,0.0 +gfx950,256,6800,8192,512,ck,0,0,72.7259,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.35,1637.48,0.0 +gfx950,256,6825,8192,512,ck,0,0,72.7409,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.07,1642.95,0.0 +gfx950,256,6669,8192,512,ck,0,0,72.7858,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.61,1605.72,0.0 +gfx950,256,6388,8192,512,ck,0,0,72.7953,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,736.12,1540.29,0.0 +gfx950,256,6905,8192,512,ck,0,0,72.8586,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.01,1658.85,0.0 +gfx950,256,6715,8192,512,ck,0,0,72.9641,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.02,1612.45,0.0 +gfx950,256,6826,8192,512,ck,0,0,72.9645,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.77,1638.14,0.0 +gfx950,256,6943,8192,512,ck,0,0,72.9713,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.15,1665.08,0.0 +gfx950,256,6678,8192,512,ck,0,0,72.9729,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.67,1603.69,0.0 +gfx950,256,6781,8192,512,ck,0,0,72.9733,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.51,1627.53,0.0 +gfx950,256,6808,8192,512,ck,0,0,72.9759,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.58,1633.72,0.0 +gfx950,256,6816,8192,512,ck,0,0,72.9825,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.43,1635.43,0.0 +gfx950,256,6820,8192,512,ck,0,0,73.0142,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.55,1635.64,0.0 +gfx950,256,6710,8192,512,ck,0,0,73.0966,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.04,1608.37,0.0 +gfx950,256,6890,8192,512,ck,0,0,73.1569,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.05,1648.62,0.0 +gfx950,256,6973,8192,512,ck,0,0,73.1841,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.27,1667.17,0.0 +gfx950,256,6838,8192,512,ck,0,0,73.1907,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.72,1635.85,0.0 +gfx950,256,6846,8192,512,ck,0,0,73.2039,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.5,1637.4,0.0 +gfx950,256,7031,8192,512,ck,0,0,73.2353,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.35,1679.38,0.0 +gfx950,256,6807,8192,512,ck,0,0,73.2441,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.6,1627.51,0.0 +gfx950,256,7022,8192,512,ck,0,0,73.2582,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.07,1676.78,0.0 +gfx950,256,6985,8192,512,ck,0,0,73.2981,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.4,1667.34,0.0 +gfx950,256,6922,8192,512,ck,0,0,73.302,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.15,1652.73,0.0 +gfx950,256,6851,8192,512,ck,0,0,73.3159,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.87,1636.05,0.0 +gfx950,256,6956,8192,512,ck,0,0,73.3368,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.66,1659.78,0.0 +gfx950,256,7061,8192,512,ck,0,0,73.3436,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.6,1683.81,0.0 +gfx950,256,6774,8192,512,ck,0,0,73.3708,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.48,1617.1,0.0 +gfx950,256,6898,8192,512,ck,0,0,73.4272,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.05,1644.39,0.0 +gfx950,256,7018,8192,512,ck,0,0,73.432,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.71,1671.89,0.0 +gfx950,256,7029,8192,512,ck,0,0,73.4486,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.79,1674.05,0.0 +gfx950,256,6778,8192,512,ck,0,0,73.4585,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.02,1616.09,0.0 +gfx950,256,7024,8192,512,ck,0,0,73.5318,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.31,1671.0,0.0 +gfx950,256,6960,8192,512,ck,0,0,73.5437,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.88,1656.03,0.0 +gfx950,256,6881,8192,512,ck,0,0,73.5697,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.59,1637.3,0.0 +gfx950,256,7037,8192,512,ck,0,0,73.6166,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.87,1672.06,0.0 +gfx950,256,7030,8192,512,ck,0,0,73.6358,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.86,1670.02,0.0 +gfx950,256,6920,8192,512,ck,0,0,73.6373,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.31,1644.75,0.0 +gfx950,256,7015,8192,512,ck,0,0,73.6625,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.86,1665.97,0.0 +gfx950,256,6949,8192,512,ck,0,0,73.7161,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.77,1649.63,0.0 +gfx950,256,6874,8192,512,ck,0,0,73.7229,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.16,1632.29,0.0 +gfx950,256,6939,8192,512,ck,0,0,73.7521,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.25,1646.54,0.0 +gfx950,256,7153,8192,512,ck,0,0,73.809,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.96,1694.26,0.0 +gfx950,256,7039,8192,512,ck,0,0,73.8194,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.89,1667.93,0.0 +gfx950,256,6916,8192,512,ck,0,0,73.8573,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.51,1638.93,0.0 +gfx950,256,6886,8192,512,ck,0,0,73.8828,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.83,1631.51,0.0 +gfx950,256,6953,8192,512,ck,0,0,73.9021,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.23,1646.4,0.0 +gfx950,256,6810,8192,512,ck,0,0,73.9185,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.83,1613.35,0.0 +gfx950,256,6982,8192,512,ck,0,0,73.9197,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.34,1652.63,0.0 +gfx950,256,7023,8192,512,ck,0,0,73.9274,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.91,1661.83,0.0 +gfx950,256,7016,8192,512,ck,0,0,73.946,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.91,1659.81,0.0 +gfx950,256,6864,8192,512,ck,0,0,74.0069,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.03,1623.75,0.0 +gfx950,256,7072,8192,512,ck,0,0,74.0613,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.02,1670.01,0.0 +gfx950,256,6861,8192,512,ck,0,0,74.1129,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.58,1620.74,0.0 +gfx950,256,7118,8192,512,ck,0,0,74.1234,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.55,1679.09,0.0 +gfx950,256,4096,6144,1536,ck,0,0,74.1463,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1042.66,890.95,0.0 +gfx950,256,7034,8192,512,ck,0,0,74.1634,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.61,1659.05,0.0 +gfx950,256,6836,8192,512,ck,0,0,74.1649,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.2,1613.91,0.0 +gfx950,256,6930,8192,512,ck,0,0,74.1904,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.57,1634.76,0.0 +gfx950,256,7065,8192,512,ck,0,0,74.1934,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.8,1665.44,0.0 +gfx950,256,6959,8192,512,ck,0,0,74.2189,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.54,1640.74,0.0 +gfx950,256,7162,8192,512,ck,0,0,74.2197,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,809.48,1686.93,0.0 +gfx950,256,6885,8192,512,ck,0,0,74.2365,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.99,1623.5,0.0 +gfx950,256,7107,8192,512,ck,0,0,74.2449,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.99,1673.84,0.0 +gfx950,256,6932,8192,512,ck,0,0,74.2652,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.0,1633.57,0.0 +gfx950,256,7115,8192,512,ck,0,0,74.2833,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.48,1674.8,0.0 +gfx950,256,7167,8192,512,ck,0,0,74.2833,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,809.35,1686.62,0.0 +gfx950,256,7025,8192,512,ck,0,0,74.2877,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.27,1654.23,0.0 +gfx950,256,6941,8192,512,ck,0,0,74.2976,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.68,1634.9,0.0 +gfx950,256,7089,8192,512,ck,0,0,74.303,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.33,1668.44,0.0 +gfx950,256,7149,8192,512,ck,0,0,74.3266,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.85,1681.55,0.0 +gfx950,256,7110,8192,512,ck,0,0,74.333,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.38,1672.54,0.0 +gfx950,256,7151,8192,512,ck,0,0,74.4406,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.84,1679.43,0.0 +gfx950,256,7157,8192,512,ck,0,0,74.4477,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.44,1680.63,0.0 +gfx950,256,7036,8192,512,ck,0,0,74.4582,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.69,1652.93,0.0 +gfx950,256,6871,8192,512,ck,0,0,74.4891,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.78,1614.82,0.0 +gfx950,256,6661,8192,512,ck,0,0,74.5194,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.83,1566.55,0.0 +gfx950,256,7074,8192,512,ck,0,0,74.5202,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.31,1660.18,0.0 +gfx950,256,6986,8192,512,ck,0,0,74.5476,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.11,1639.62,0.0 +gfx950,256,6883,8192,512,ck,0,0,74.571,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.28,1615.77,0.0 +gfx950,256,7020,8192,512,ck,0,0,74.5986,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.4,1646.2,0.0 +gfx950,256,7155,8192,512,ck,0,0,74.5994,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.57,1676.76,0.0 +gfx950,256,6786,8192,512,ck,0,0,74.6099,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.97,1592.96,0.0 +gfx950,256,6921,8192,512,ck,0,0,74.6304,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.93,1623.09,0.0 +gfx950,256,6981,8192,512,ck,0,0,74.6393,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.58,1636.47,0.0 +gfx950,256,6809,8192,512,ck,0,0,74.6442,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.2,1597.43,0.0 +gfx950,256,7156,8192,512,ck,0,0,74.657,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.06,1675.69,0.0 +gfx950,256,7021,8192,512,ck,0,0,74.6798,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.65,1644.64,0.0 +gfx950,256,6945,8192,512,ck,0,0,74.7133,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.77,1626.71,0.0 +gfx950,256,7111,8192,512,ck,0,0,74.7321,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.2,1663.83,0.0 +gfx950,256,7069,8192,512,ck,0,0,74.7622,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.17,1653.67,0.0 +gfx950,256,6599,8192,512,ck,0,0,74.7635,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,740.42,1547.43,0.0 +gfx950,256,7113,8192,512,ck,0,0,74.7705,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.02,1663.43,0.0 +gfx950,256,7092,8192,512,ck,0,0,74.7709,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.66,1658.68,0.0 +gfx950,256,7126,8192,512,ck,0,0,74.7806,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.37,1666.14,0.0 +gfx950,256,6942,8192,512,ck,0,0,74.8273,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.24,1623.56,0.0 +gfx950,256,6996,8192,512,ck,0,0,74.8365,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.2,1635.55,0.0 +gfx950,256,7095,8192,512,ck,0,0,74.8417,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.24,1657.78,0.0 +gfx950,256,6812,8192,512,ck,0,0,74.852,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.42,1593.68,0.0 +gfx950,256,7075,8192,512,ck,0,0,74.864,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.76,1652.78,0.0 +gfx950,256,7090,8192,512,ck,0,0,74.8845,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.23,1655.71,0.0 +gfx950,256,6976,8192,512,ck,0,0,74.8945,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.35,1629.77,0.0 +gfx950,256,6811,8192,512,ck,0,0,74.916,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.65,1592.09,0.0 +gfx950,256,7165,8192,512,ck,0,0,74.9317,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.12,1671.58,0.0 +gfx950,256,7066,8192,512,ck,0,0,74.9345,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.01,1649.19,0.0 +gfx950,256,6979,8192,512,ck,0,0,74.9661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.94,1628.89,0.0 +gfx950,256,7007,8192,512,ck,0,0,74.9999,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.72,1634.46,0.0 +gfx950,256,6929,8192,512,ck,0,0,75.0273,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.71,1616.3,0.0 +gfx950,256,4108,6144,1536,ck,0,0,75.1006,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1032.43,881.83,0.0 +gfx950,256,6915,8192,512,ck,0,0,75.1089,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.31,1611.4,0.0 +gfx950,256,7144,8192,512,ck,0,0,75.1134,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.84,1662.81,0.0 +gfx950,256,7049,8192,512,ck,0,0,75.149,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.85,1640.66,0.0 +gfx950,256,7159,8192,512,ck,0,0,75.196,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.63,1664.35,0.0 +gfx950,256,7163,8192,512,ck,0,0,75.2221,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.8,1664.68,0.0 +gfx950,256,7148,8192,512,ck,0,0,75.2293,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.05,1661.15,0.0 +gfx950,256,7108,8192,512,ck,0,0,75.2374,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.51,1651.99,0.0 +gfx950,256,6978,8192,512,ck,0,0,75.2492,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.89,1622.54,0.0 +gfx950,256,7076,8192,512,ck,0,0,75.3094,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.19,1643.23,0.0 +gfx950,256,7102,8192,512,ck,0,0,75.4005,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.13,1647.07,0.0 +gfx950,256,7114,8192,512,ck,0,0,75.4206,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.25,1649.32,0.0 +gfx950,256,4112,6144,1536,ck,0,0,75.4754,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1028.3,878.19,0.0 +gfx950,256,6813,8192,512,ck,0,0,75.5113,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.86,1579.99,0.0 +gfx950,256,7164,8192,512,ck,0,0,75.5225,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.74,1658.28,0.0 +gfx950,256,7097,8192,512,ck,0,0,75.6181,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.3,1641.21,0.0 +gfx950,256,4107,6144,1536,ck,0,0,75.6397,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1024.82,875.37,0.0 +gfx950,256,7063,8192,512,ck,0,0,75.6754,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.93,1632.38,0.0 +gfx950,256,7232,8192,512,ck,0,0,75.6889,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.52,1669.81,0.0 +gfx950,256,7131,8192,512,ck,0,0,75.7098,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.11,1646.81,0.0 +gfx950,256,7058,8192,512,ck,0,0,75.7166,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.95,1630.37,0.0 +gfx950,256,1185,7168,4096,ck,0,0,75.8547,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,917.33,675.0,0.0 +gfx950,256,7009,8192,512,ck,0,0,75.9418,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.22,1614.64,0.0 +gfx950,256,7121,8192,512,ck,0,0,76.0022,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.97,1638.25,0.0 +gfx950,256,4252,6144,1536,ck,0,0,76.025,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1055.62,897.29,0.0 +gfx950,256,7041,8192,512,ck,0,0,76.2741,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.37,1614.69,0.0 +gfx950,256,7000,8192,512,ck,0,0,76.2773,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.83,1605.54,0.0 +gfx950,256,7213,8192,512,ck,0,0,76.3153,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.86,1651.9,0.0 +gfx950,256,7237,8192,512,ck,0,0,76.3905,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.71,1655.58,0.0 +gfx950,256,4244,6144,1536,ck,0,0,76.401,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1048.45,891.43,0.0 +gfx950,256,4245,6144,1536,ck,0,0,76.4018,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1048.69,891.6,0.0 +gfx950,256,7199,8192,512,ck,0,0,76.4167,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.27,1646.61,0.0 +gfx950,256,7369,8192,512,ck,0,0,76.4333,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.75,1683.83,0.0 +gfx950,256,7285,8192,512,ck,0,0,76.4798,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.05,1664.25,0.0 +gfx950,256,4123,6144,1536,ck,0,0,76.5018,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1017.22,868.39,0.0 +gfx950,256,7222,8192,512,ck,0,0,76.5029,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.9,1649.84,0.0 +gfx950,256,7205,8192,512,ck,0,0,76.5221,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.84,1645.67,0.0 +gfx950,256,7184,8192,512,ck,0,0,76.5405,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.34,1640.64,0.0 +gfx950,256,7191,8192,512,ck,0,0,76.5657,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.85,1641.64,0.0 +gfx950,256,7206,8192,512,ck,0,0,76.6045,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.1,1644.12,0.0 +gfx950,256,7215,8192,512,ck,0,0,76.6365,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.75,1645.42,0.0 +gfx950,256,4180,6144,1536,ck,0,0,76.6411,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1029.41,877.09,0.0 +gfx950,256,7256,8192,512,ck,0,0,76.6431,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.17,1654.31,0.0 +gfx950,256,1255,7168,4096,ck,0,0,76.6443,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,961.51,684.88,0.0 +gfx950,256,1265,7168,4096,ck,0,0,76.6454,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,969.15,687.28,0.0 +gfx950,256,7220,8192,512,ck,0,0,76.7274,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.36,1644.57,0.0 +gfx950,256,7393,8192,512,ck,0,0,76.7591,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.94,1681.97,0.0 +gfx950,256,4176,6144,1536,ck,0,0,76.7635,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1026.78,874.98,0.0 +gfx950,256,464,9216,7168,ck,18,0,76.7809,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,798.43,1015.08,0.0 +gfx950,256,7356,8192,512,ck,0,0,76.8211,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.25,1672.47,0.0 +gfx950,256,7283,8192,512,ck,0,0,76.8669,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.81,1655.43,0.0 +gfx950,256,7293,8192,512,ck,0,0,76.873,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.83,1657.5,0.0 +gfx950,256,4484,6144,1536,ck,0,0,76.8787,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1100.86,929.05,0.0 +gfx950,256,3258,8192,1536,ck,0,0,76.881,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1066.46,923.07,0.0 +gfx950,256,7329,8192,512,ck,0,0,76.8822,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.67,1665.21,0.0 +gfx950,256,7279,8192,512,ck,0,0,76.8906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.12,1654.04,0.0 +gfx950,256,7265,8192,512,ck,0,0,76.9039,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.46,1650.68,0.0 +gfx950,256,7218,8192,512,ck,0,0,76.9071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.3,1640.29,0.0 +gfx950,256,7372,8192,512,ck,0,0,76.9129,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.04,1673.99,0.0 +gfx950,256,7231,8192,512,ck,0,0,76.9569,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.21,1642.08,0.0 +gfx950,256,7183,8192,512,ck,0,0,76.969,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.85,1631.28,0.0 +gfx950,256,4294,6144,1536,ck,0,0,76.9962,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1052.6,893.52,0.0 +gfx950,256,7339,8192,512,ck,0,0,77.0805,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.7,1663.12,0.0 +gfx950,256,7364,8192,512,ck,0,0,77.0846,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.38,1668.51,0.0 +gfx950,256,7420,8192,512,ck,0,0,77.0859,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.46,1680.76,0.0 +gfx950,256,7385,8192,512,ck,0,0,77.0937,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.57,1672.92,0.0 +gfx950,256,4359,6144,1536,ck,0,0,77.1162,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1066.88,903.78,0.0 +gfx950,256,7230,8192,512,ck,0,0,77.1213,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.42,1638.36,0.0 +gfx950,256,7354,8192,512,ck,0,0,77.1383,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.73,1665.16,0.0 +gfx950,256,7412,8192,512,ck,0,0,77.1585,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.83,1677.42,0.0 +gfx950,256,7307,8192,512,ck,0,0,77.1764,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.23,1654.05,0.0 +gfx950,256,7326,8192,512,ck,0,0,77.1773,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.28,1658.19,0.0 +gfx950,256,7235,8192,512,ck,0,0,77.2085,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.07,1637.6,0.0 +gfx950,256,7303,8192,512,ck,0,0,77.2331,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.21,1651.96,0.0 +gfx950,256,7408,8192,512,ck,0,0,77.2481,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.46,1674.6,0.0 +gfx950,256,7337,8192,512,ck,0,0,77.2502,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.73,1659.03,0.0 +gfx950,256,7196,8192,512,ck,0,0,77.2577,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.34,1628.03,0.0 +gfx950,256,7417,8192,512,ck,0,0,77.2593,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.32,1676.33,0.0 +gfx950,256,7403,8192,512,ck,0,0,77.2646,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.74,1673.15,0.0 +gfx950,256,7680,8192,512,ck,0,0,77.3079,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.35,1732.75,0.0 +gfx950,256,7314,8192,512,ck,0,0,77.3111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.6,1652.69,0.0 +gfx950,256,7207,8192,512,ck,0,0,77.3558,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.54,1628.37,0.0 +gfx950,256,7477,8192,512,ck,0,0,77.3924,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.44,1686.54,0.0 +gfx950,256,7306,8192,512,ck,0,0,77.3994,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.83,1649.06,0.0 +gfx950,256,7422,8192,512,ck,0,0,77.4694,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.68,1672.87,0.0 +gfx950,256,7423,8192,512,ck,0,0,77.4866,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.61,1672.72,0.0 +gfx950,256,7330,8192,512,ck,0,0,77.4963,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.44,1652.23,0.0 +gfx950,256,7249,8192,512,ck,0,0,77.5098,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.53,1634.29,0.0 +gfx950,256,7321,8192,512,ck,0,0,77.5181,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.24,1649.81,0.0 +gfx950,256,7371,8192,512,ck,0,0,77.529,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.54,1660.47,0.0 +gfx950,256,7172,8192,512,ck,0,0,77.5424,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.87,1616.82,0.0 +gfx950,256,7203,8192,512,ck,0,0,77.6014,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.63,1622.34,0.0 +gfx950,256,7375,8192,512,ck,0,0,77.6599,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.63,1658.54,0.0 +gfx950,256,7309,8192,512,ck,0,0,77.6761,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.33,1643.84,0.0 +gfx950,256,7272,8192,512,ck,0,0,77.6779,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.32,1635.76,0.0 +gfx950,256,7241,8192,512,ck,0,0,77.7,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.75,1628.55,0.0 +gfx950,256,7483,8192,512,ck,0,0,77.7017,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.86,1681.14,0.0 +gfx950,256,7379,8192,512,ck,0,0,77.7023,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.62,1658.51,0.0 +gfx950,256,7317,8192,512,ck,0,0,77.7052,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.9,1644.97,0.0 +gfx950,256,7511,8192,512,ck,0,0,77.7061,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.84,1687.13,0.0 +gfx950,256,4504,6144,1536,ck,0,0,77.7066,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1093.99,922.71,0.0 +gfx950,256,7432,8192,512,ck,0,0,77.7111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.26,1669.84,0.0 +gfx950,256,7242,8192,512,ck,0,0,77.7218,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.64,1628.31,0.0 +gfx950,256,7405,8192,512,ck,0,0,77.7459,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.98,1663.23,0.0 +gfx950,256,7312,8192,512,ck,0,0,77.7546,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.86,1642.83,0.0 +gfx950,256,7552,8192,512,ck,0,0,77.7589,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,814.71,1694.89,0.0 +gfx950,256,7323,8192,512,ck,0,0,77.7854,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.73,1644.57,0.0 +gfx950,256,7324,8192,512,ck,0,0,77.7877,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.82,1644.74,0.0 +gfx950,256,7476,8192,512,ck,0,0,77.8178,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.9,1677.11,0.0 +gfx950,256,4498,6144,1536,ck,0,0,77.8287,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1090.82,920.19,0.0 +gfx950,256,7467,8192,512,ck,0,0,77.8398,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.7,1674.68,0.0 +gfx950,256,7439,8192,512,ck,0,0,77.8506,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.57,1668.37,0.0 +gfx950,256,7508,8192,512,ck,0,0,77.871,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.79,1682.9,0.0 +gfx950,256,7546,8192,512,ck,0,0,77.9079,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.5,1690.35,0.0 +gfx950,256,7335,8192,512,ck,0,0,77.9234,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.63,1644.26,0.0 +gfx950,256,7452,8192,512,ck,0,0,77.9429,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.02,1669.21,0.0 +gfx950,256,7421,8192,512,ck,0,0,77.9554,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.56,1662.23,0.0 +gfx950,256,7277,8192,512,ck,0,0,77.9759,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.86,1630.59,0.0 +gfx950,256,4597,6144,1536,ck,0,0,78.0162,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1112.15,935.53,0.0 +gfx950,256,7490,8192,512,ck,0,0,78.0378,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.13,1675.41,0.0 +gfx950,256,1334,7168,4096,ck,0,0,78.0403,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1003.75,691.29,0.0 +gfx950,256,7418,8192,512,ck,0,0,78.0455,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.31,1659.66,0.0 +gfx950,256,7668,8192,512,ck,0,0,78.063,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.0,1713.4,0.0 +gfx950,256,1333,7168,4096,ck,0,0,78.0782,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1002.51,690.72,0.0 +gfx950,256,7325,8192,512,ck,0,0,78.0799,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.97,1638.8,0.0 +gfx950,256,7475,8192,512,ck,0,0,78.0915,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.97,1671.01,0.0 +gfx950,256,7614,8192,512,ck,0,0,78.1063,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.74,1700.76,0.0 +gfx950,256,7460,8192,512,ck,0,0,78.1143,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.12,1667.28,0.0 +gfx950,256,7336,8192,512,ck,0,0,78.1266,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.68,1640.2,0.0 +gfx950,256,4524,6144,1536,ck,0,0,78.155,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1092.54,920.95,0.0 +gfx950,256,7486,8192,512,ck,0,0,78.1564,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.48,1672.0,0.0 +gfx950,256,7353,8192,512,ck,0,0,78.2013,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.75,1642.31,0.0 +gfx950,256,4538,6144,1536,ck,0,0,78.2022,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1095.26,922.87,0.0 +gfx950,256,7370,8192,512,ck,0,0,78.2487,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.1,1644.98,0.0 +gfx950,256,7413,8192,512,ck,0,0,78.249,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.7,1654.26,0.0 +gfx950,256,7234,8192,512,ck,0,0,78.2682,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.32,1615.21,0.0 +gfx950,256,7550,8192,512,ck,0,0,78.2729,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,809.14,1683.33,0.0 +gfx950,256,7416,8192,512,ck,0,0,78.3067,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.44,1653.69,0.0 +gfx950,256,7210,8192,512,ck,0,0,78.313,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.31,1609.11,0.0 +gfx950,256,7398,8192,512,ck,0,0,78.321,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.37,1649.51,0.0 +gfx950,256,7334,8192,512,ck,0,0,78.3311,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.41,1635.49,0.0 +gfx950,256,7520,8192,512,ck,0,0,78.4355,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.26,1673.38,0.0 +gfx950,256,7505,8192,512,ck,0,0,78.4371,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.64,1670.11,0.0 +gfx950,256,7480,8192,512,ck,0,0,78.4381,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.95,1664.71,0.0 +gfx950,256,7377,8192,512,ck,0,0,78.4931,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.38,1641.37,0.0 +gfx950,256,7537,8192,512,ck,0,0,78.5053,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.36,1675.55,0.0 +gfx950,256,7316,8192,512,ck,0,0,78.511,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.69,1627.87,0.0 +gfx950,256,7662,8192,512,ck,0,0,78.5262,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.5,1702.0,0.0 +gfx950,256,7458,8192,512,ck,0,0,78.5304,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.66,1658.02,0.0 +gfx950,256,7366,8192,512,ck,0,0,78.5911,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.23,1636.96,0.0 +gfx950,256,7473,8192,512,ck,0,0,78.6007,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.55,1659.76,0.0 +gfx950,256,7626,8192,512,ck,0,0,78.6367,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.51,1691.87,0.0 +gfx950,256,7659,8192,512,ck,0,0,78.6443,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.95,1698.8,0.0 +gfx950,256,7515,8192,512,ck,0,0,78.7127,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.89,1666.41,0.0 +gfx950,256,7628,8192,512,ck,0,0,78.7256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.8,1690.39,0.0 +gfx950,256,7500,8192,512,ck,0,0,78.732,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.1,1662.78,0.0 +gfx950,256,496,9216,7168,ck,18,0,78.7393,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,832.26,1000.24,0.0 +gfx950,256,7469,8192,512,ck,0,0,78.7483,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.63,1655.79,0.0 +gfx950,256,7637,8192,512,ck,0,0,78.7552,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.45,1691.69,0.0 +gfx950,256,7488,8192,512,ck,0,0,78.761,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.53,1659.6,0.0 +gfx950,256,7676,8192,512,ck,0,0,78.787,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.28,1699.37,0.0 +gfx950,256,7409,8192,512,ck,0,0,78.7906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.81,1642.03,0.0 +gfx950,256,7524,8192,512,ck,0,0,78.8269,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.69,1665.93,0.0 +gfx950,256,7530,8192,512,ck,0,0,78.843,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.16,1666.87,0.0 +gfx950,256,7169,8192,512,ck,0,0,78.873,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.47,1588.91,0.0 +gfx950,256,7501,8192,512,ck,0,0,78.8897,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.61,1659.67,0.0 +gfx950,256,7525,8192,512,ck,0,0,78.9086,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.97,1664.42,0.0 +gfx950,256,7457,8192,512,ck,0,0,78.9094,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.73,1649.84,0.0 +gfx950,256,7594,8192,512,ck,0,0,78.9428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.95,1678.46,0.0 +gfx950,256,4522,6144,1536,ck,0,0,78.9583,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1080.95,911.23,0.0 +gfx950,256,7542,8192,512,ck,0,0,78.9604,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.25,1666.96,0.0 +gfx950,256,7646,8192,512,ck,0,0,78.9622,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.28,1689.18,0.0 +gfx950,256,7651,8192,512,ck,0,0,78.9624,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.81,1690.24,0.0 +gfx950,256,4663,6144,1536,ck,0,0,78.9642,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1114.57,935.85,0.0 +gfx950,256,7671,8192,512,ck,0,0,78.9761,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,814.79,1694.23,0.0 +gfx950,256,7526,8192,512,ck,0,0,79.0071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.08,1662.55,0.0 +gfx950,256,7459,8192,512,ck,0,0,79.0139,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.89,1648.08,0.0 +gfx950,256,7535,8192,512,ck,0,0,79.0167,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.93,1664.28,0.0 +gfx950,256,4598,6144,1536,ck,0,0,79.0362,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1098.03,923.63,0.0 +gfx950,256,7602,8192,512,ck,0,0,79.0396,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.81,1678.12,0.0 +gfx950,256,7601,8192,512,ck,0,0,79.0452,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.65,1677.78,0.0 +gfx950,256,7495,8192,512,ck,0,0,79.0621,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.23,1654.77,0.0 +gfx950,256,4507,6144,1536,ck,0,0,79.0727,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1075.8,907.29,0.0 +gfx950,256,7598,8192,512,ck,0,0,79.1145,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.63,1675.67,0.0 +gfx950,256,7464,8192,512,ck,0,0,79.1677,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.89,1645.95,0.0 +gfx950,256,7627,8192,512,ck,0,0,79.1683,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.15,1680.72,0.0 +gfx950,256,3451,8192,1536,ck,0,0,79.1891,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1096.71,939.84,0.0 +gfx950,256,7599,8192,512,ck,0,0,79.1932,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.93,1674.22,0.0 +gfx950,256,7427,8192,512,ck,0,0,79.2303,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.34,1636.76,0.0 +gfx950,256,7563,8192,512,ck,0,0,79.2648,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.39,1665.04,0.0 +gfx950,256,7465,8192,512,ck,0,0,79.2715,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.96,1644.01,0.0 +gfx950,256,7560,8192,512,ck,0,0,79.292,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.8,1663.83,0.0 +gfx950,256,7611,8192,512,ck,0,0,79.302,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.1,1674.48,0.0 +gfx950,256,7556,8192,512,ck,0,0,79.3482,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.81,1661.8,0.0 +gfx950,256,7351,8192,512,ck,0,0,79.3583,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.04,1617.94,0.0 +gfx950,256,7656,8192,512,ck,0,0,79.4242,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.61,1681.48,0.0 +gfx950,256,7604,8192,512,ck,0,0,79.4369,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.99,1670.15,0.0 +gfx950,256,7607,8192,512,ck,0,0,79.4428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.25,1670.66,0.0 +gfx950,256,7522,8192,512,ck,0,0,79.4523,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.18,1652.39,0.0 +gfx950,256,7633,8192,512,ck,0,0,79.4604,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.81,1675.82,0.0 +gfx950,256,7634,8192,512,ck,0,0,79.4996,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.52,1675.21,0.0 +gfx950,256,7478,8192,512,ck,0,0,79.5477,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.58,1641.06,0.0 +gfx950,256,7649,8192,512,ck,0,0,79.5639,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.45,1677.04,0.0 +gfx950,256,7630,8192,512,ck,0,0,79.579,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.3,1672.69,0.0 +gfx950,256,7618,8192,512,ck,0,0,79.7008,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.8,1667.59,0.0 +gfx950,256,7624,8192,512,ck,0,0,79.7121,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.32,1668.62,0.0 +gfx950,256,7663,8192,512,ck,0,0,79.7294,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.25,1676.53,0.0 +gfx950,256,7297,8192,512,ck,0,0,79.8898,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.2,1595.75,0.0 +gfx950,256,7561,8192,512,ck,0,0,80.0224,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.61,1648.85,0.0 +gfx950,256,7673,8192,512,ck,0,0,80.0259,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.31,1672.42,0.0 +gfx950,256,7592,8192,512,ck,0,0,80.0465,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.62,1654.9,0.0 +gfx950,256,7577,8192,512,ck,0,0,80.2477,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.05,1647.59,0.0 +gfx950,256,7806,8192,512,ck,0,0,80.5543,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.89,1689.35,0.0 +gfx950,256,7539,8192,512,ck,0,0,80.7652,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.03,1629.08,0.0 +gfx950,256,7700,8192,512,ck,0,0,80.9405,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.02,1659.16,0.0 +gfx950,256,7772,8192,512,ck,0,0,80.9927,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.96,1673.11,0.0 +gfx950,256,7729,8192,512,ck,0,0,81.0246,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.2,1663.49,0.0 +gfx950,256,7553,8192,512,ck,0,0,81.0622,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.61,1626.03,0.0 +gfx950,256,7764,8192,512,ck,0,0,81.0664,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.41,1669.93,0.0 +gfx950,256,7727,8192,512,ck,0,0,81.073,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.51,1662.08,0.0 +gfx950,256,7768,8192,512,ck,0,0,81.0964,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.52,1670.14,0.0 +gfx950,256,7807,8192,512,ck,0,0,81.1006,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.51,1678.18,0.0 +gfx950,256,1185,7168,4608,ck,0,0,81.1467,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,964.69,683.69,0.0 +gfx950,256,7758,8192,512,ck,0,0,81.2416,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.05,1665.08,0.0 +gfx950,256,7696,8192,512,ck,0,0,81.2577,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.49,1651.85,0.0 +gfx950,256,7698,8192,512,ck,0,0,81.2669,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.61,1652.08,0.0 +gfx950,256,1490,7168,4096,ck,0,0,81.2908,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1076.3,699.02,0.0 +gfx950,256,1255,7168,4608,ck,0,0,81.3074,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1019.66,698.64,0.0 +gfx950,256,7803,8192,512,ck,0,0,81.3462,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.66,1672.28,0.0 +gfx950,256,1265,7168,4608,ck,0,0,81.3629,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1027.08,700.49,0.0 +gfx950,256,7726,8192,512,ck,0,0,81.3938,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.26,1655.32,0.0 +gfx950,256,7737,8192,512,ck,0,0,81.5748,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.62,1653.93,0.0 +gfx950,256,7708,8192,512,ck,0,0,81.5818,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.57,1647.78,0.0 +gfx950,256,7797,8192,512,ck,0,0,81.6102,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.44,1665.63,0.0 +gfx950,256,7756,8192,512,ck,0,0,81.6238,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.1,1656.87,0.0 +gfx950,256,7749,8192,512,ck,0,0,81.653,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.09,1654.82,0.0 +gfx950,256,7730,8192,512,ck,0,0,81.6984,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.7,1649.98,0.0 +gfx950,256,7695,8192,512,ck,0,0,81.7967,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.16,1640.76,0.0 +gfx950,256,7762,8192,512,ck,0,0,81.797,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.02,1654.6,0.0 +gfx950,256,1485,7168,4096,ck,0,0,81.8248,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1065.69,693.33,0.0 +gfx950,256,7712,8192,512,ck,0,0,81.8596,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.29,1643.01,0.0 +gfx950,256,7790,8192,512,ck,0,0,81.8914,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.97,1658.47,0.0 +gfx950,256,7770,8192,512,ck,0,0,81.9094,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.75,1653.98,0.0 +gfx950,256,7688,8192,512,ck,0,0,81.9632,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.84,1635.99,0.0 +gfx950,256,7703,8192,512,ck,0,0,82.023,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.8,1637.88,0.0 +gfx950,256,7753,8192,512,ck,0,0,82.0386,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.76,1647.87,0.0 +gfx950,256,7720,8192,512,ck,0,0,82.1658,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.16,1638.53,0.0 +gfx950,256,7738,8192,512,ck,0,0,82.2186,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.49,1641.18,0.0 +gfx950,256,1517,7168,4096,ck,0,0,82.3083,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1082.26,696.42,0.0 +gfx950,256,7748,8192,512,ck,0,0,82.3198,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.54,1641.22,0.0 +gfx950,256,7847,8192,512,ck,0,0,82.3356,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.48,1661.22,0.0 +gfx950,256,7766,8192,512,ck,0,0,82.4192,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.42,1642.93,0.0 +gfx950,256,7682,8192,512,ck,0,0,82.4298,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.77,1625.5,0.0 +gfx950,256,7904,8192,512,ck,0,0,82.5012,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.67,1669.55,0.0 +gfx950,256,7888,8192,512,ck,0,0,82.5444,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.62,1665.41,0.0 +gfx950,256,7879,8192,512,ck,0,0,82.6232,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.94,1661.98,0.0 +gfx950,256,7863,8192,512,ck,0,0,82.6468,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.09,1658.23,0.0 +gfx950,256,7718,8192,512,ck,0,0,82.6918,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.95,1627.7,0.0 +gfx950,256,7724,8192,512,ck,0,0,82.7178,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.31,1628.42,0.0 +gfx950,256,7909,8192,512,ck,0,0,82.7184,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.06,1666.19,0.0 +gfx950,256,7899,8192,512,ck,0,0,82.7342,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.9,1663.83,0.0 +gfx950,256,7902,8192,512,ck,0,0,82.752,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.03,1664.09,0.0 +gfx950,256,7854,8192,512,ck,0,0,82.8032,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.67,1653.26,0.0 +gfx950,256,7810,8192,512,ck,0,0,82.8144,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.11,1644.06,0.0 +gfx950,256,7811,8192,512,ck,0,0,82.8156,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.2,1644.24,0.0 +gfx950,256,7721,8192,512,ck,0,0,82.852,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.74,1625.17,0.0 +gfx950,256,7755,8192,512,ck,0,0,82.8576,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.13,1631.99,0.0 +gfx950,256,7765,8192,512,ck,0,0,82.8613,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.1,1633.96,0.0 +gfx950,256,7886,8192,512,ck,0,0,82.8718,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.25,1658.42,0.0 +gfx950,256,7761,8192,512,ck,0,0,82.8908,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.42,1632.56,0.0 +gfx950,256,7898,8192,512,ck,0,0,82.9168,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.03,1659.96,0.0 +gfx950,256,7881,8192,512,ck,0,0,82.9236,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.25,1656.36,0.0 +gfx950,256,7454,8192,512,ck,0,0,82.9438,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.87,1568.98,0.0 +gfx950,256,7681,8192,512,ck,0,0,83.012,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.19,1613.89,0.0 +gfx950,256,7930,8192,512,ck,0,0,83.0308,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.17,1664.2,0.0 +gfx950,256,7880,8192,512,ck,0,0,83.1034,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.42,1652.58,0.0 +gfx950,256,7846,8192,512,ck,0,0,83.1236,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.8,1645.26,0.0 +gfx950,256,1334,7168,4608,ck,0,0,83.1308,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1060.07,701.32,0.0 +gfx950,256,7895,8192,512,ck,0,0,83.1662,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.33,1654.38,0.0 +gfx950,256,7815,8192,512,ck,0,0,83.1792,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.14,1637.87,0.0 +gfx950,256,7852,8192,512,ck,0,0,83.203,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.65,1644.91,0.0 +gfx950,256,7812,8192,512,ck,0,0,83.2033,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.61,1636.78,0.0 +gfx950,256,7784,8192,512,ck,0,0,83.287,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.0,1629.46,0.0 +gfx950,256,7818,8192,512,ck,0,0,83.2988,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.31,1636.12,0.0 +gfx950,256,1333,7168,4608,ck,0,0,83.3127,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1056.96,699.56,0.0 +gfx950,256,7859,8192,512,ck,0,0,83.4024,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.46,1642.4,0.0 +gfx950,256,7824,8192,512,ck,0,0,83.4224,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.75,1634.92,0.0 +gfx950,256,7860,8192,512,ck,0,0,83.4418,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.19,1641.83,0.0 +gfx950,256,7816,8192,512,ck,0,0,83.4902,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.31,1631.97,0.0 +gfx950,256,7767,8192,512,ck,0,0,83.5154,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.15,1621.56,0.0 +gfx950,256,7817,8192,512,ck,0,0,83.5272,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.06,1631.45,0.0 +gfx950,256,7834,8192,512,ck,0,0,83.554,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.51,1634.36,0.0 +gfx950,256,7858,8192,512,ck,0,0,83.5648,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.82,1639.0,0.0 +gfx950,256,7792,8192,512,ck,0,0,83.6111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.76,1624.76,0.0 +gfx950,256,7823,8192,512,ck,0,0,83.6344,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.65,1630.57,0.0 +gfx950,256,4854,6144,1536,ck,0,0,83.635,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1095.43,915.15,0.0 +gfx950,256,7842,8192,512,ck,0,0,83.7052,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.89,1633.03,0.0 +gfx950,256,7840,8192,512,ck,0,0,83.7102,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.65,1632.52,0.0 +gfx950,256,7893,8192,512,ck,0,0,83.7194,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.87,1643.04,0.0 +gfx950,256,7789,8192,512,ck,0,0,83.72,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.45,1622.04,0.0 +gfx950,256,7894,8192,512,ck,0,0,83.7456,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.72,1642.73,0.0 +gfx950,256,7845,8192,512,ck,0,0,83.7886,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.41,1632.01,0.0 +gfx950,256,7819,8192,512,ck,0,0,83.8082,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.63,1626.38,0.0 +gfx950,256,7877,8192,512,ck,0,0,83.9604,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.0,1635.11,0.0 +gfx950,256,7685,8192,512,ck,0,0,84.0006,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.45,1595.7,0.0 +gfx950,256,7851,8192,512,ck,0,0,84.0058,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.98,1628.99,0.0 +gfx950,256,7926,8192,512,ck,0,0,84.023,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.31,1643.74,0.0 +gfx950,256,7913,8192,512,ck,0,0,84.034,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.91,1640.91,0.0 +gfx950,256,7809,8192,512,ck,0,0,84.0548,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.33,1619.6,0.0 +gfx950,256,3617,8192,1536,ck,0,0,84.0917,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1082.45,920.42,0.0 +gfx950,256,7901,8192,512,ck,0,0,84.3474,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.78,1632.41,0.0 +gfx950,256,7889,8192,512,ck,0,0,84.7579,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.79,1622.11,0.0 +gfx950,256,8005,8192,512,ck,0,0,85.282,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.4,1635.13,0.0 +gfx950,256,8064,8192,512,ck,0,0,85.2832,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.19,1646.79,0.0 +gfx950,256,8041,8192,512,ck,0,0,85.3796,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.03,1640.38,0.0 +gfx950,256,7989,8192,512,ck,0,0,85.4448,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.33,1628.85,0.0 +gfx950,256,7975,8192,512,ck,0,0,85.4466,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.94,1626.04,0.0 +gfx950,256,8060,8192,512,ck,0,0,85.5378,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.44,1641.1,0.0 +gfx950,256,7950,8192,512,ck,0,0,85.6062,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.03,1618.08,0.0 +gfx950,256,1485,7168,4608,ck,0,0,85.6611,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1145.21,714.0,0.0 +gfx950,256,7949,8192,512,ck,0,0,85.7178,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.91,1615.77,0.0 +gfx950,256,7972,8192,512,ck,0,0,85.722,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.13,1620.23,0.0 +gfx950,256,8058,8192,512,ck,0,0,85.742,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,788.36,1636.8,0.0 +gfx950,256,8013,8192,512,ck,0,0,85.782,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.59,1627.17,0.0 +gfx950,256,7984,8192,512,ck,0,0,85.8744,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.91,1619.71,0.0 +gfx950,256,8042,8192,512,ck,0,0,85.89,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.44,1630.83,0.0 +gfx950,256,7981,8192,512,ck,0,0,85.9244,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.17,1618.18,0.0 +gfx950,256,3620,8192,1536,ck,0,0,85.9409,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1060.03,901.24,0.0 +gfx950,256,7956,8192,512,ck,0,0,85.968,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.33,1612.45,0.0 +gfx950,256,7954,8192,512,ck,0,0,85.9714,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.11,1611.99,0.0 +gfx950,256,7983,8192,512,ck,0,0,85.9716,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.93,1617.69,0.0 +gfx950,256,7962,8192,512,ck,0,0,85.991,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.71,1613.2,0.0 +gfx950,256,7946,8192,512,ck,0,0,85.9993,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.07,1609.9,0.0 +gfx950,256,8063,8192,512,ck,0,0,86.0132,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,786.36,1632.62,0.0 +gfx950,256,7947,8192,512,ck,0,0,86.0177,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.01,1609.75,0.0 +gfx950,256,5061,6144,1536,ck,0,0,86.0622,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1109.93,922.59,0.0 +gfx950,256,7980,8192,512,ck,0,0,86.0646,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.8,1615.35,0.0 +gfx950,256,5033,6144,1536,ck,0,0,86.0678,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1103.72,918.04,0.0 +gfx950,256,7988,8192,512,ck,0,0,86.0728,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.51,1616.77,0.0 +gfx950,256,7958,8192,512,ck,0,0,86.0794,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.52,1610.75,0.0 +gfx950,256,8059,8192,512,ck,0,0,86.104,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.14,1630.11,0.0 +gfx950,256,7963,8192,512,ck,0,0,86.1046,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.78,1611.26,0.0 +gfx950,256,5005,6144,1536,ck,0,0,86.123,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1096.88,912.95,0.0 +gfx950,256,7976,8192,512,ck,0,0,86.1364,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.76,1613.22,0.0 +gfx950,256,7942,8192,512,ck,0,0,86.1415,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.41,1606.45,0.0 +gfx950,256,7968,8192,512,ck,0,0,86.1431,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.92,1611.52,0.0 +gfx950,256,8044,8192,512,ck,0,0,86.1464,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,783.29,1626.37,0.0 +gfx950,256,7961,8192,512,ck,0,0,86.1548,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.14,1609.93,0.0 +gfx950,256,7985,8192,512,ck,0,0,86.163,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.4,1614.48,0.0 +gfx950,256,7997,8192,512,ck,0,0,86.1674,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.53,1616.76,0.0 +gfx950,256,3632,8192,1536,ck,0,0,86.1739,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1060.67,901.3,0.0 +gfx950,256,7960,8192,512,ck,0,0,86.1751,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.86,1609.36,0.0 +gfx950,256,7948,8192,512,ck,0,0,86.1786,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.66,1606.94,0.0 +gfx950,256,5012,6144,1536,ck,0,0,86.183,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1097.64,913.44,0.0 +gfx950,256,7967,8192,512,ck,0,0,86.1896,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.41,1610.46,0.0 +gfx950,256,7959,8192,512,ck,0,0,86.2034,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.5,1608.63,0.0 +gfx950,256,8012,8192,512,ck,0,0,86.2044,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,779.65,1619.0,0.0 +gfx950,256,7951,8192,512,ck,0,0,86.2225,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.55,1606.71,0.0 +gfx950,256,1517,7168,4608,ck,0,0,86.2279,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1162.19,716.34,0.0 +gfx950,256,7996,8192,512,ck,0,0,86.25,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.68,1615.01,0.0 +gfx950,256,5030,6144,1536,ck,0,0,86.2578,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1100.63,915.53,0.0 +gfx950,256,7943,8192,512,ck,0,0,86.2664,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.38,1604.32,0.0 +gfx950,256,7952,8192,512,ck,0,0,86.2755,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.18,1605.92,0.0 +gfx950,256,8006,8192,512,ck,0,0,86.2925,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.27,1616.17,0.0 +gfx950,256,7978,8192,512,ck,0,0,86.2996,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.49,1610.56,0.0 +gfx950,256,7955,8192,512,ck,0,0,86.3048,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.21,1605.96,0.0 +gfx950,256,8053,8192,512,ck,0,0,86.3076,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.71,1625.09,0.0 +gfx950,256,5001,6144,1536,ck,0,0,86.3097,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1093.63,910.34,0.0 +gfx950,256,8000,8192,512,ck,0,0,86.3101,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.53,1614.67,0.0 +gfx950,256,8052,8192,512,ck,0,0,86.3301,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.4,1624.47,0.0 +gfx950,256,5027,6144,1536,ck,0,0,86.3334,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1099.01,914.25,0.0 +gfx950,256,7953,8192,512,ck,0,0,86.3417,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.68,1604.88,0.0 +gfx950,256,8057,8192,512,ck,0,0,86.3498,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,782.71,1625.08,0.0 +gfx950,256,3655,8192,1536,ck,0,0,86.3614,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1065.07,904.11,0.0 +gfx950,256,7992,8192,512,ck,0,0,86.3622,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.29,1612.13,0.0 +gfx950,256,7945,8192,512,ck,0,0,86.3843,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.52,1602.53,0.0 +gfx950,256,8002,8192,512,ck,0,0,86.4136,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.79,1613.13,0.0 +gfx950,256,7970,8192,512,ck,0,0,86.4302,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.54,1606.56,0.0 +gfx950,256,7986,8192,512,ck,0,0,86.4477,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.94,1609.36,0.0 +gfx950,256,5018,6144,1536,ck,0,0,86.4594,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1095.45,911.48,0.0 +gfx950,256,7966,8192,512,ck,0,0,86.4714,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.78,1605.01,0.0 +gfx950,256,8054,8192,512,ck,0,0,86.4766,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,781.27,1622.11,0.0 +gfx950,256,7973,8192,512,ck,0,0,86.4968,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.24,1605.91,0.0 +gfx950,256,7999,8192,512,ck,0,0,86.5196,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.55,1610.56,0.0 +gfx950,256,7991,8192,512,ck,0,0,86.5493,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.51,1608.45,0.0 +gfx950,256,8004,8192,512,ck,0,0,86.5518,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.75,1610.94,0.0 +gfx950,256,1490,7168,4608,ck,0,0,86.554,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1137.21,707.73,0.0 +gfx950,256,8049,8192,512,ck,0,0,86.5586,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.05,1619.6,0.0 +gfx950,256,7965,8192,512,ck,0,0,86.5635,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.86,1603.11,0.0 +gfx950,256,8037,8192,512,ck,0,0,86.5865,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,778.63,1616.74,0.0 +gfx950,256,5060,6144,1536,ck,0,0,86.5978,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1102.85,916.73,0.0 +gfx950,256,7957,8192,512,ck,0,0,86.6118,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.66,1600.66,0.0 +gfx950,256,8031,8192,512,ck,0,0,86.6392,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.58,1614.58,0.0 +gfx950,256,7998,8192,512,ck,0,0,86.659,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.21,1607.78,0.0 +gfx950,256,8061,8192,512,ck,0,0,86.662,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.28,1620.01,0.0 +gfx950,256,5014,6144,1536,ck,0,0,86.6922,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1091.63,908.39,0.0 +gfx950,256,4936,6144,1536,ck,0,0,86.7046,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1074.5,895.83,0.0 +gfx950,256,8552,8192,512,ck,0,0,86.7076,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.37,1714.83,0.0 +gfx950,256,1553,7168,4096,ck,0,0,86.7394,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1051.34,668.5,0.0 +gfx950,256,5059,6144,1536,ck,0,0,86.7406,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1100.82,915.06,0.0 +gfx950,256,8008,8192,512,ck,0,0,86.7744,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.15,1607.59,0.0 +gfx950,256,5007,6144,1536,ck,0,0,86.7958,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1088.81,906.2,0.0 +gfx950,256,8050,8192,512,ck,0,0,86.8486,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.54,1614.39,0.0 +gfx950,256,8460,8192,512,ck,0,0,86.855,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.08,1694.02,0.0 +gfx950,256,8045,8192,512,ck,0,0,86.8661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.9,1613.09,0.0 +gfx950,256,8055,8192,512,ck,0,0,86.878,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,777.76,1614.81,0.0 +gfx950,256,8027,8192,512,ck,0,0,86.8974,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.88,1609.01,0.0 +gfx950,256,8007,8192,512,ck,0,0,86.9106,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.84,1604.87,0.0 +gfx950,256,5003,6144,1536,ck,0,0,86.9246,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1086.33,904.22,0.0 +gfx950,256,8023,8192,512,ck,0,0,86.9645,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.9,1606.99,0.0 +gfx950,256,7964,8192,512,ck,0,0,86.9863,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.02,1595.13,0.0 +gfx950,256,8683,8192,512,ck,0,0,87.0,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,837.22,1734.51,0.0 +gfx950,256,8546,8192,512,ck,0,0,87.0032,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.98,1707.84,0.0 +gfx950,256,8015,8192,512,ck,0,0,87.0234,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.6,1604.35,0.0 +gfx950,256,8504,8192,512,ck,0,0,87.026,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.72,1699.24,0.0 +gfx950,256,8024,8192,512,ck,0,0,87.0408,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.32,1605.77,0.0 +gfx950,256,4898,6144,1536,ck,0,0,87.049,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1062.01,886.25,0.0 +gfx950,256,8039,8192,512,ck,0,0,87.0564,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,774.62,1608.4,0.0 +gfx950,256,7994,8192,512,ck,0,0,87.0682,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.18,1599.45,0.0 +gfx950,256,5062,6144,1536,ck,0,0,87.085,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1097.11,911.92,0.0 +gfx950,256,8048,8192,512,ck,0,0,87.0966,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,775.13,1609.4,0.0 +gfx950,256,8557,8192,512,ck,0,0,87.1143,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.99,1707.8,0.0 +gfx950,256,8025,8192,512,ck,0,0,87.1164,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.74,1604.57,0.0 +gfx950,256,8021,8192,512,ck,0,0,87.1276,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,772.26,1603.59,0.0 +gfx950,256,8471,8192,512,ck,0,0,87.1282,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,815.58,1690.85,0.0 +gfx950,256,8595,8192,512,ck,0,0,87.1387,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.42,1714.69,0.0 +gfx950,256,8543,8192,512,ck,0,0,87.1576,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.23,1704.23,0.0 +gfx950,256,8032,8192,512,ck,0,0,87.1636,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,773.0,1605.06,0.0 +gfx950,256,8686,8192,512,ck,0,0,87.1709,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.87,1731.69,0.0 +gfx950,256,8678,8192,512,ck,0,0,87.1753,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.06,1730.05,0.0 +gfx950,256,8458,8192,512,ck,0,0,87.2014,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.64,1686.91,0.0 +gfx950,256,8514,8192,512,ck,0,0,87.2271,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.79,1697.26,0.0 +gfx950,256,8487,8192,512,ck,0,0,87.2476,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.0,1691.63,0.0 +gfx950,256,8608,8192,512,ck,0,0,87.2579,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.54,1714.86,0.0 +gfx950,256,8029,8192,512,ck,0,0,87.2615,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.84,1602.68,0.0 +gfx950,256,8537,8192,512,ck,0,0,87.2691,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.61,1700.89,0.0 +gfx950,256,8593,8192,512,ck,0,0,87.282,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.87,1711.48,0.0 +gfx950,256,8534,8192,512,ck,0,0,87.2883,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.14,1699.94,0.0 +gfx950,256,8020,8192,512,ck,0,0,87.3174,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,770.48,1599.91,0.0 +gfx950,256,8634,8192,512,ck,0,0,87.3192,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.45,1718.69,0.0 +gfx950,256,8010,8192,512,ck,0,0,87.3268,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.44,1597.81,0.0 +gfx950,256,5051,6144,1536,ck,0,0,87.3458,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1091.46,907.45,0.0 +gfx950,256,8602,8192,512,ck,0,0,87.3543,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.05,1711.81,0.0 +gfx950,256,3749,8192,1536,ck,0,0,87.3745,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1079.8,912.91,0.0 +gfx950,256,8001,8192,512,ck,0,0,87.3812,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.1,1595.07,0.0 +gfx950,256,8480,8192,512,ck,0,0,87.3833,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,814.06,1687.65,0.0 +gfx950,256,8585,8192,512,ck,0,0,87.3857,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.12,1707.9,0.0 +gfx950,256,8448,8192,512,ck,0,0,87.3862,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.96,1681.41,0.0 +gfx950,256,8508,8192,512,ck,0,0,87.4018,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.58,1692.71,0.0 +gfx950,256,5006,6144,1536,ck,0,0,87.4218,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1080.8,899.55,0.0 +gfx950,256,8536,8192,512,ck,0,0,87.4385,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.92,1697.41,0.0 +gfx950,256,8016,8192,512,ck,0,0,87.4398,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.02,1596.9,0.0 +gfx950,256,8026,8192,512,ck,0,0,87.4467,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.92,1598.71,0.0 +gfx950,256,1601,7168,4096,ck,0,0,87.4472,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1075.06,673.2,0.0 +gfx950,256,8018,8192,512,ck,0,0,87.4571,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.06,1596.97,0.0 +gfx950,256,8554,8192,512,ck,0,0,87.4793,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.26,1700.09,0.0 +gfx950,256,5004,6144,1536,ck,0,0,87.507,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1079.31,898.36,0.0 +gfx950,256,8338,8192,512,ck,0,0,87.5118,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.25,1657.76,0.0 +gfx950,256,7690,8192,512,ck,0,0,87.5286,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,737.0,1532.35,0.0 +gfx950,256,8474,8192,512,ck,0,0,87.541,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.02,1683.45,0.0 +gfx950,256,8034,8192,512,ck,0,0,87.5455,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.82,1598.45,0.0 +gfx950,256,8661,8192,512,ck,0,0,87.5896,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.48,1718.59,0.0 +gfx950,256,8431,8192,512,ck,0,0,87.5915,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.43,1674.19,0.0 +gfx950,256,5280,6144,1536,ck,0,0,87.6018,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1137.61,940.94,0.0 +gfx950,256,8361,8192,512,ck,0,0,87.6063,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.59,1660.4,0.0 +gfx950,256,5000,6144,1536,ck,0,0,87.641,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1076.8,896.35,0.0 +gfx950,256,8689,8192,512,ck,0,0,87.6459,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.63,1722.88,0.0 +gfx950,256,5028,6144,1536,ck,0,0,87.6518,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1082.7,900.66,0.0 +gfx950,256,8470,8192,512,ck,0,0,87.654,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.59,1680.51,0.0 +gfx950,256,8555,8192,512,ck,0,0,87.6631,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.64,1696.72,0.0 +gfx950,256,8522,8192,512,ck,0,0,87.6678,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,815.44,1690.27,0.0 +gfx950,256,8411,8192,512,ck,0,0,87.67,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.8,1668.83,0.0 +gfx950,256,8036,8192,512,ck,0,0,87.6752,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.87,1596.47,0.0 +gfx950,256,8550,8192,512,ck,0,0,87.6752,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.05,1695.52,0.0 +gfx950,256,8335,8192,512,ck,0,0,87.6882,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.36,1653.84,0.0 +gfx950,256,8614,8192,512,ck,0,0,87.6987,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.95,1707.4,0.0 +gfx950,256,8485,8192,512,ck,0,0,87.6988,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,811.61,1682.54,0.0 +gfx950,256,8677,8192,512,ck,0,0,87.7208,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.77,1719.1,0.0 +gfx950,256,8695,8192,512,ck,0,0,87.7216,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.48,1722.55,0.0 +gfx950,256,8455,8192,512,ck,0,0,87.7446,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.32,1675.89,0.0 +gfx950,256,8475,8192,512,ck,0,0,87.768,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.02,1679.29,0.0 +gfx950,256,7990,8192,512,ck,0,0,87.8008,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.38,1585.33,0.0 +gfx950,256,8051,8192,512,ck,0,0,87.8039,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.18,1597.01,0.0 +gfx950,256,5348,6144,1536,ck,0,0,87.8102,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1149.53,949.41,0.0 +gfx950,256,8028,8192,512,ck,0,0,87.8198,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.84,1592.3,0.0 +gfx950,256,8353,8192,512,ck,0,0,87.8277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.81,1654.68,0.0 +gfx950,256,8482,8192,512,ck,0,0,87.8367,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.05,1679.32,0.0 +gfx950,256,5334,6144,1536,ck,0,0,87.8382,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1146.15,946.9,0.0 +gfx950,256,8627,8192,512,ck,0,0,87.868,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.6,1706.61,0.0 +gfx950,256,8047,8192,512,ck,0,0,87.8712,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,768.21,1595.02,0.0 +gfx950,256,8636,8192,512,ck,0,0,87.8739,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.41,1708.22,0.0 +gfx950,256,8366,8192,512,ck,0,0,87.8828,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.55,1656.14,0.0 +gfx950,256,1548,7168,4096,ck,0,0,87.8916,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1034.22,658.68,0.0 +gfx950,256,8469,8192,512,ck,0,0,87.9021,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.21,1675.57,0.0 +gfx950,256,8573,8192,512,ck,0,0,87.9479,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.71,1694.68,0.0 +gfx950,256,8421,8192,512,ck,0,0,87.9513,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.18,1665.42,0.0 +gfx950,256,8641,8192,512,ck,0,0,87.9517,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.16,1707.67,0.0 +gfx950,256,5002,6144,1536,ck,0,0,87.9522,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1073.42,893.49,0.0 +gfx950,256,8649,8192,512,ck,0,0,87.9883,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.58,1708.5,0.0 +gfx950,256,8549,8192,512,ck,0,0,88.004,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,814.9,1688.99,0.0 +gfx950,256,8434,8192,512,ck,0,0,88.0121,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,803.86,1666.76,0.0 +gfx950,256,8690,8192,512,ck,0,0,88.0149,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.23,1715.85,0.0 +gfx950,256,8571,8192,512,ck,0,0,88.0167,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.88,1692.97,0.0 +gfx950,256,8442,8192,512,ck,0,0,88.0245,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,804.51,1668.06,0.0 +gfx950,256,3840,8192,1536,ck,0,0,88.0253,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1097.83,924.69,0.0 +gfx950,256,8498,8192,512,ck,0,0,88.0318,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,809.78,1678.67,0.0 +gfx950,256,8638,8192,512,ck,0,0,88.0459,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.99,1705.27,0.0 +gfx950,256,8413,8192,512,ck,0,0,88.0603,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.42,1661.82,0.0 +gfx950,256,8600,8192,512,ck,0,0,88.0612,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.23,1697.68,0.0 +gfx950,256,8680,8192,512,ck,0,0,88.0737,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.73,1712.79,0.0 +gfx950,256,5259,6144,1536,ck,0,0,88.0754,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1126.99,932.58,0.0 +gfx950,256,8547,8192,512,ck,0,0,88.1076,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.75,1686.62,0.0 +gfx950,256,8631,8192,512,ck,0,0,88.1159,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.67,1702.57,0.0 +gfx950,256,8419,8192,512,ck,0,0,88.1167,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.48,1661.91,0.0 +gfx950,256,8643,8192,512,ck,0,0,88.1303,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.68,1704.59,0.0 +gfx950,256,8542,8192,512,ck,0,0,88.1359,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.01,1685.12,0.0 +gfx950,256,8545,8192,512,ck,0,0,88.1482,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.18,1685.46,0.0 +gfx950,256,8635,8192,512,ck,0,0,88.152,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.71,1702.64,0.0 +gfx950,256,8702,8192,512,ck,0,0,88.1564,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.05,1715.4,0.0 +gfx950,256,8519,8192,512,ck,0,0,88.1602,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.6,1680.25,0.0 +gfx950,256,1639,7168,4096,ck,0,0,88.1622,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1091.65,675.69,0.0 +gfx950,256,5268,6144,1536,ck,0,0,88.1782,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1127.6,932.91,0.0 +gfx950,256,8439,8192,512,ck,0,0,88.179,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.82,1664.56,0.0 +gfx950,256,5332,6144,1536,ck,0,0,88.1878,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1141.18,942.84,0.0 +gfx950,256,8351,8192,512,ck,0,0,88.1985,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.27,1647.34,0.0 +gfx950,256,5331,6144,1536,ck,0,0,88.2022,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1140.78,942.53,0.0 +gfx950,256,8559,8192,512,ck,0,0,88.2036,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,814.0,1687.09,0.0 +gfx950,256,8558,8192,512,ck,0,0,88.2071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.88,1686.83,0.0 +gfx950,256,5373,6144,1536,ck,0,0,88.2205,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1149.53,948.91,0.0 +gfx950,256,8603,8192,512,ck,0,0,88.2256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.98,1695.09,0.0 +gfx950,256,8606,8192,512,ck,0,0,88.2436,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.1,1695.32,0.0 +gfx950,256,8347,8192,512,ck,0,0,88.2448,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.47,1645.71,0.0 +gfx950,256,5369,6144,1536,ck,0,0,88.2462,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1148.34,948.01,0.0 +gfx950,256,8346,8192,512,ck,0,0,88.2505,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.32,1645.41,0.0 +gfx950,256,3831,8192,1536,ck,0,0,88.2521,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1092.44,920.48,0.0 +gfx950,256,8364,8192,512,ck,0,0,88.257,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.98,1648.74,0.0 +gfx950,256,5252,6144,1536,ck,0,0,88.2746,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1122.95,929.38,0.0 +gfx950,256,8354,8192,512,ck,0,0,88.2799,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.82,1646.39,0.0 +gfx950,256,8441,8192,512,ck,0,0,88.2888,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.01,1662.88,0.0 +gfx950,256,8646,8192,512,ck,0,0,88.2939,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.44,1702.01,0.0 +gfx950,256,5339,6144,1536,ck,0,0,88.2958,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1141.28,942.78,0.0 +gfx950,256,8017,8192,512,ck,0,0,88.3001,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.62,1581.53,0.0 +gfx950,256,5284,6144,1536,ck,0,0,88.3107,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1129.33,934.01,0.0 +gfx950,256,8672,8192,512,ck,0,0,88.3119,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.74,1706.64,0.0 +gfx950,256,8544,8192,512,ck,0,0,88.3287,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,811.43,1681.83,0.0 +gfx950,256,8511,8192,512,ck,0,0,88.3457,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.14,1675.19,0.0 +gfx950,256,8348,8192,512,ck,0,0,88.3481,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.64,1643.98,0.0 +gfx950,256,5366,6144,1536,ck,0,0,88.3579,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1146.25,946.34,0.0 +gfx950,256,8416,8192,512,ck,0,0,88.3743,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,798.86,1656.49,0.0 +gfx950,256,5367,6144,1536,ck,0,0,88.3782,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1146.2,946.28,0.0 +gfx950,256,5358,6144,1536,ck,0,0,88.3795,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1144.26,944.86,0.0 +gfx950,256,8394,8192,512,ck,0,0,88.3871,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.65,1652.04,0.0 +gfx950,256,8393,8192,512,ck,0,0,88.3932,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.5,1651.74,0.0 +gfx950,256,8337,8192,512,ck,0,0,88.4077,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.06,1640.76,0.0 +gfx950,256,8377,8192,512,ck,0,0,88.4114,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.82,1648.34,0.0 +gfx950,256,8572,8192,512,ck,0,0,88.4384,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.08,1685.09,0.0 +gfx950,256,8528,8192,512,ck,0,0,88.4534,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.77,1676.4,0.0 +gfx950,256,8503,8192,512,ck,0,0,88.4724,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.22,1671.27,0.0 +gfx950,256,5126,6144,1536,ck,0,0,88.4746,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1093.53,907.59,0.0 +gfx950,256,8343,8192,512,ck,0,0,88.4879,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.91,1640.42,0.0 +gfx950,256,3587,8192,1536,ck,0,0,88.5037,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1019.96,868.46,0.0 +gfx950,256,8398,8192,512,ck,0,0,88.5393,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.66,1649.97,0.0 +gfx950,256,8476,8192,512,ck,0,0,88.5913,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.58,1663.87,0.0 +gfx950,256,5279,6144,1536,ck,0,0,88.5934,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1124.66,930.25,0.0 +gfx950,256,3824,8192,1536,ck,0,0,88.6124,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1086.01,915.32,0.0 +gfx950,256,8407,8192,512,ck,0,0,88.6128,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,795.86,1650.31,0.0 +gfx950,256,8506,8192,512,ck,0,0,88.6233,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.13,1668.99,0.0 +gfx950,256,8613,8192,512,ck,0,0,88.6444,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,815.07,1688.99,0.0 +gfx950,256,8699,8192,512,ck,0,0,88.6632,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.03,1705.02,0.0 +gfx950,256,1590,7168,4096,ck,0,0,88.6728,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1052.92,661.61,0.0 +gfx950,256,8381,8192,512,ck,0,0,88.6744,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,792.84,1644.21,0.0 +gfx950,256,5251,6144,1536,ck,0,0,88.7014,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1117.34,924.75,0.0 +gfx950,256,8388,8192,512,ck,0,0,88.7154,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,793.14,1644.79,0.0 +gfx950,256,8404,8192,512,ck,0,0,88.7304,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,794.52,1647.56,0.0 +gfx950,256,8430,8192,512,ck,0,0,88.7324,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,796.96,1652.47,0.0 +gfx950,256,1623,7168,4096,ck,0,0,88.7389,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1073.97,667.97,0.0 +gfx950,256,8622,8192,512,ck,0,0,88.7427,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,815.01,1688.83,0.0 +gfx950,256,8336,8192,512,ck,0,0,88.7551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.87,1634.15,0.0 +gfx950,256,5271,6144,1536,ck,0,0,88.7738,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1120.68,927.11,0.0 +gfx950,256,8446,8192,512,ck,0,0,88.7908,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.95,1654.43,0.0 +gfx950,256,5274,6144,1536,ck,0,0,88.8214,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1120.71,927.08,0.0 +gfx950,256,8339,8192,512,ck,0,0,88.8353,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.44,1633.25,0.0 +gfx950,256,8365,8192,512,ck,0,0,88.8512,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,789.76,1637.9,0.0 +gfx950,256,8597,8192,512,ck,0,0,88.8516,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,811.66,1682.01,0.0 +gfx950,256,8590,8192,512,ck,0,0,88.8775,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.76,1680.19,0.0 +gfx950,256,8340,8192,512,ck,0,0,88.8779,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.16,1632.65,0.0 +gfx950,256,8473,8192,512,ck,0,0,88.8961,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.55,1657.6,0.0 +gfx950,256,1599,7168,4096,ck,0,0,88.8986,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1056.19,661.8,0.0 +gfx950,256,5264,6144,1536,ck,0,0,88.901,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1117.59,924.7,0.0 +gfx950,256,5275,6144,1536,ck,0,0,88.903,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1119.9,926.39,0.0 +gfx950,256,8594,8192,512,ck,0,0,88.9167,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.78,1680.21,0.0 +gfx950,256,1580,7168,4096,ck,0,0,88.9863,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1042.61,657.21,0.0 +gfx950,256,5290,6144,1536,ck,0,0,88.9907,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1121.98,927.81,0.0 +gfx950,256,5341,6144,1536,ck,0,0,88.9962,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1132.72,935.67,0.0 +gfx950,256,5258,6144,1536,ck,0,0,89.0102,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1114.94,922.63,0.0 +gfx950,256,5375,6144,1536,ck,0,0,89.0244,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1139.57,940.65,0.0 +gfx950,256,8341,8192,512,ck,0,0,89.0353,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,785.86,1629.96,0.0 +gfx950,256,8696,8192,512,ck,0,0,89.0404,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.26,1697.23,0.0 +gfx950,256,3925,8192,1536,ck,0,0,89.0837,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1108.8,930.8,0.0 +gfx950,256,5281,6144,1536,ck,0,0,89.111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1118.55,925.16,0.0 +gfx950,256,5328,6144,1536,ck,0,0,89.1462,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1128.06,932.08,0.0 +gfx950,256,5273,6144,1536,ck,0,0,89.1719,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1116.1,923.29,0.0 +gfx950,256,1548,7168,4608,ck,0,0,89.2248,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1146.11,698.86,0.0 +gfx950,256,5292,6144,1536,ck,0,0,89.2263,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1119.44,925.67,0.0 +gfx950,256,8502,8192,512,ck,0,0,89.2353,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,799.23,1656.79,0.0 +gfx950,256,5283,6144,1536,ck,0,0,89.2446,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1117.3,924.08,0.0 +gfx950,256,5303,6144,1536,ck,0,0,89.2931,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1120.92,926.68,0.0 +gfx950,256,5285,6144,1536,ck,0,0,89.297,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1117.07,923.85,0.0 +gfx950,256,5124,6144,1536,ck,0,0,89.3264,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1082.68,898.63,0.0 +gfx950,256,1599,7168,4608,ck,0,0,89.3279,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1182.5,708.87,0.0 +gfx950,256,1580,7168,4608,ck,0,0,89.4024,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1167.48,704.25,0.0 +gfx950,256,8767,8192,512,ck,0,0,89.4861,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.84,1702.18,0.0 +gfx950,256,5276,6144,1536,ck,0,0,89.5035,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1112.6,920.33,0.0 +gfx950,256,5368,6144,1536,ck,0,0,89.5462,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1131.46,934.09,0.0 +gfx950,256,7941,8192,512,ck,18,0,89.547,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,743.9,1545.17,0.0 +gfx950,256,5315,6144,1536,ck,0,0,89.5562,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1120.16,925.81,0.0 +gfx950,256,1639,7168,4608,ck,0,0,89.559,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1208.96,715.5,0.0 +gfx950,256,5302,6144,1536,ck,0,0,89.5627,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1117.34,923.73,0.0 +gfx950,256,8942,8192,512,ck,0,0,89.6048,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,837.13,1732.92,0.0 +gfx950,256,8342,8192,512,ck,0,0,89.6135,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,780.88,1619.63,0.0 +gfx950,256,5295,6144,1536,ck,0,0,89.6722,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1114.5,921.53,0.0 +gfx950,256,8852,8192,512,ck,0,0,89.7424,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.43,1713.32,0.0 +gfx950,256,8782,8192,512,ck,0,0,89.762,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.71,1699.77,0.0 +gfx950,256,8592,8192,512,ck,0,0,89.7683,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.9,1663.89,0.0 +gfx950,256,5269,6144,1536,ck,0,0,89.8495,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1106.84,915.71,0.0 +gfx950,256,8732,8192,512,ck,0,0,89.854,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,815.2,1688.63,0.0 +gfx950,256,3894,8192,1536,ck,0,0,89.8785,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1090.31,916.39,0.0 +gfx950,256,8807,8192,512,ck,0,0,89.8864,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.91,1702.12,0.0 +gfx950,256,5272,6144,1536,ck,0,0,89.8892,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1106.98,915.76,0.0 +gfx950,256,8804,8192,512,ck,0,0,89.8981,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.52,1701.33,0.0 +gfx950,256,8731,8192,512,ck,0,0,89.9176,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,814.53,1687.25,0.0 +gfx950,256,8879,8192,512,ck,0,0,89.9257,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.27,1714.9,0.0 +gfx950,256,4000,8192,1536,ck,0,0,89.9474,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1119.14,936.8,0.0 +gfx950,256,8737,8192,512,ck,0,0,89.9833,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,814.5,1687.14,0.0 +gfx950,256,8944,8192,512,ck,0,0,89.9921,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.71,1725.84,0.0 +gfx950,256,8216,8192,512,ck,0,0,90.0013,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.78,1589.0,0.0 +gfx950,256,1553,7168,4608,ck,0,0,90.0316,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1139.51,693.65,0.0 +gfx950,256,5277,6144,1536,ck,0,0,90.0487,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1106.07,914.91,0.0 +gfx950,256,8208,8192,512,ck,0,0,90.0624,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.51,1586.42,0.0 +gfx950,256,8733,8192,512,ck,0,0,90.0736,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.31,1684.7,0.0 +gfx950,256,8826,8192,512,ck,0,0,90.0901,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.82,1701.83,0.0 +gfx950,256,8811,8192,512,ck,0,0,90.1596,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.79,1697.71,0.0 +gfx950,256,8785,8192,512,ck,0,0,90.1773,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.21,1692.51,0.0 +gfx950,256,8204,8192,512,ck,0,0,90.1844,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.1,1583.52,0.0 +gfx950,256,8583,8192,512,ck,0,0,90.2385,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,797.88,1653.54,0.0 +gfx950,256,1601,7168,4608,ck,0,0,90.2456,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1171.94,702.08,0.0 +gfx950,256,8810,8192,512,ck,0,0,90.2613,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.77,1695.61,0.0 +gfx950,256,8894,8192,512,ck,0,0,90.2724,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.48,1711.12,0.0 +gfx950,256,8746,8192,512,ck,0,0,90.2808,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.65,1683.27,0.0 +gfx950,256,8789,8192,512,ck,0,0,90.292,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.54,1691.1,0.0 +gfx950,256,8754,8192,512,ck,0,0,90.2924,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.29,1684.55,0.0 +gfx950,256,8950,8192,512,ck,0,0,90.312,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.32,1720.85,0.0 +gfx950,256,8451,8192,512,ck,0,0,90.3252,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,784.85,1627.26,0.0 +gfx950,256,8891,8192,512,ck,0,0,90.328,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.69,1709.51,0.0 +gfx950,256,8795,8192,512,ck,0,0,90.3353,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.71,1691.42,0.0 +gfx950,256,1590,7168,4608,ck,0,0,90.3476,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1162.57,698.98,0.0 +gfx950,256,8779,8192,512,ck,0,0,90.3476,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,815.11,1688.19,0.0 +gfx950,256,8727,8192,512,ck,0,0,90.3478,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.28,1678.47,0.0 +gfx950,256,8802,8192,512,ck,0,0,90.3497,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.23,1692.46,0.0 +gfx950,256,8851,8192,512,ck,0,0,90.3632,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.66,1701.37,0.0 +gfx950,256,8906,8192,512,ck,0,0,90.442,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.04,1710.16,0.0 +gfx950,256,8302,8192,512,ck,0,0,90.4665,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.81,1596.89,0.0 +gfx950,256,8893,8192,512,ck,0,0,90.4677,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.6,1707.24,0.0 +gfx950,256,8808,8192,512,ck,0,0,90.474,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.66,1691.25,0.0 +gfx950,256,8822,8192,512,ck,0,0,90.4776,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.93,1693.8,0.0 +gfx950,256,8870,8192,512,ck,0,0,90.4813,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.35,1702.69,0.0 +gfx950,256,8201,8192,512,ck,0,0,90.4826,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.31,1577.74,0.0 +gfx950,256,8757,8192,512,ck,0,0,90.5068,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,811.64,1681.12,0.0 +gfx950,256,8245,8192,512,ck,0,0,90.5189,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.08,1585.32,0.0 +gfx950,256,8819,8192,512,ck,0,0,90.5208,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.26,1692.43,0.0 +gfx950,256,8907,8192,512,ck,0,0,90.5263,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.37,1708.75,0.0 +gfx950,256,8772,8192,512,ck,0,0,90.5265,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.85,1683.55,0.0 +gfx950,256,8714,8192,512,ck,0,0,90.5328,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.42,1672.61,0.0 +gfx950,256,8916,8192,512,ck,0,0,90.5721,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.78,1709.57,0.0 +gfx950,256,8250,8192,512,ck,0,0,90.5727,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.09,1585.32,0.0 +gfx950,256,8202,8192,512,ck,0,0,90.5778,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.61,1576.27,0.0 +gfx950,256,8988,8192,512,ck,0,0,90.5788,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.39,1722.87,0.0 +gfx950,256,8092,8192,512,ck,18,0,90.6027,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,749.21,1555.33,0.0 +gfx950,256,8234,8192,512,ck,0,0,90.6486,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.97,1581.01,0.0 +gfx950,256,8741,8192,512,ck,0,0,90.665,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.74,1675.2,0.0 +gfx950,256,8861,8192,512,ck,0,0,90.6929,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.6,1697.04,0.0 +gfx950,256,8222,8192,512,ck,0,0,90.7041,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.4,1577.8,0.0 +gfx950,256,8842,8192,512,ck,0,0,90.7088,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.69,1693.21,0.0 +gfx950,256,8875,8192,512,ck,0,0,90.71,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.74,1699.33,0.0 +gfx950,256,8280,8192,512,ck,0,0,90.7139,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,765.68,1588.44,0.0 +gfx950,256,8787,8192,512,ck,0,0,90.7213,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.5,1682.73,0.0 +gfx950,256,8196,8192,512,ck,0,0,90.7342,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.74,1572.44,0.0 +gfx950,256,8243,8192,512,ck,0,0,90.7397,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.04,1581.09,0.0 +gfx950,256,8293,8192,512,ck,0,0,90.7426,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,766.64,1590.35,0.0 +gfx950,256,8256,8192,512,ck,0,0,90.746,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.19,1583.41,0.0 +gfx950,256,9032,8192,512,ck,0,0,90.7689,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.71,1727.45,0.0 +gfx950,256,8759,8192,512,ck,0,0,90.7709,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,809.46,1676.6,0.0 +gfx950,256,9088,8192,512,ck,0,0,90.7804,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,839.78,1737.66,0.0 +gfx950,256,8210,8192,512,ck,0,0,90.787,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.59,1574.13,0.0 +gfx950,256,8769,8192,512,ck,0,0,90.8037,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.1,1677.85,0.0 +gfx950,256,8247,8192,512,ck,0,0,90.8277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.67,1580.31,0.0 +gfx950,256,8198,8192,512,ck,0,0,90.8363,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.07,1571.04,0.0 +gfx950,256,9070,8192,512,ck,0,0,90.8369,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,837.6,1733.23,0.0 +gfx950,256,8209,8192,512,ck,0,0,90.8485,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.99,1572.88,0.0 +gfx950,256,8900,8192,512,ck,0,0,90.8572,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.71,1701.23,0.0 +gfx950,256,8259,8192,512,ck,0,0,90.8799,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.34,1581.63,0.0 +gfx950,256,8205,8192,512,ck,0,0,90.8847,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.32,1571.51,0.0 +gfx950,256,8285,8192,512,ck,0,0,90.8883,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.67,1586.32,0.0 +gfx950,256,8199,8192,512,ck,0,0,90.8944,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.68,1570.22,0.0 +gfx950,256,8943,8192,512,ck,0,0,90.8993,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.3,1708.43,0.0 +gfx950,256,8215,8192,512,ck,0,0,90.9105,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.02,1572.92,0.0 +gfx950,256,8239,8192,512,ck,0,0,90.9157,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,760.2,1577.29,0.0 +gfx950,256,8866,8192,512,ck,0,0,90.9256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.96,1693.63,0.0 +gfx950,256,9013,8192,512,ck,0,0,90.9277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.5,1720.91,0.0 +gfx950,256,8936,8192,512,ck,0,0,90.9332,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.35,1706.49,0.0 +gfx950,256,8862,8192,512,ck,0,0,90.9424,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.44,1692.57,0.0 +gfx950,256,8292,8192,512,ck,0,0,90.9537,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.77,1586.48,0.0 +gfx950,256,8792,8192,512,ck,0,0,90.9616,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.81,1679.21,0.0 +gfx950,256,8946,8192,512,ck,0,0,90.9644,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.99,1707.77,0.0 +gfx950,256,9018,8192,512,ck,0,0,90.9661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.61,1721.11,0.0 +gfx950,256,8964,8192,512,ck,0,0,90.9688,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.61,1711.03,0.0 +gfx950,256,8930,8192,512,ck,0,0,90.9704,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.46,1704.68,0.0 +gfx950,256,8806,8192,512,ck,0,0,90.9761,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,811.97,1681.55,0.0 +gfx950,256,1623,7168,4608,ck,0,0,90.9911,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1178.31,700.91,0.0 +gfx950,256,8236,8192,512,ck,0,0,91.0036,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,759.19,1575.21,0.0 +gfx950,256,9058,8192,512,ck,0,0,91.004,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.95,1727.82,0.0 +gfx950,256,9023,8192,512,ck,0,0,91.0278,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.51,1720.87,0.0 +gfx950,256,8099,8192,512,ck,18,0,91.0439,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,746.23,1549.09,0.0 +gfx950,256,8941,8192,512,ck,0,0,91.0608,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.65,1705.03,0.0 +gfx950,256,8203,8192,512,ck,0,0,91.0623,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.66,1568.07,0.0 +gfx950,256,8127,8192,512,ck,18,0,91.0675,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,748.61,1553.88,0.0 +gfx950,256,8982,8192,512,ck,0,0,91.0717,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.33,1712.43,0.0 +gfx950,256,8788,8192,512,ck,0,0,91.0732,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,809.45,1676.41,0.0 +gfx950,256,8106,8192,512,ck,18,0,91.0807,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,746.57,1549.76,0.0 +gfx950,256,8115,8192,512,ck,18,0,91.0893,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,747.33,1551.28,0.0 +gfx950,256,9028,8192,512,ck,0,0,91.0941,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.36,1720.54,0.0 +gfx950,256,8270,8192,512,ck,0,0,91.1105,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.42,1579.67,0.0 +gfx950,256,8903,8192,512,ck,0,0,91.1144,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.67,1696.98,0.0 +gfx950,256,8304,8192,512,ck,0,0,91.1237,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,764.44,1585.74,0.0 +gfx950,256,8945,8192,512,ck,0,0,91.1325,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.37,1704.43,0.0 +gfx950,256,8837,8192,512,ck,0,0,91.134,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.42,1684.38,0.0 +gfx950,256,8860,8192,512,ck,0,0,91.1361,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,815.52,1688.6,0.0 +gfx950,256,8219,8192,512,ck,0,0,91.1469,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.43,1569.58,0.0 +gfx950,256,8108,8192,512,ck,18,0,91.1521,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,746.17,1548.92,0.0 +gfx950,256,8126,8192,512,ck,18,0,91.1541,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,747.81,1552.22,0.0 +gfx950,256,8125,8192,512,ck,18,0,91.1703,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,747.58,1551.76,0.0 +gfx950,256,8899,8192,512,ck,0,0,91.1816,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.7,1694.99,0.0 +gfx950,256,8244,8192,512,ck,0,0,91.1859,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.4,1573.54,0.0 +gfx950,256,8986,8192,512,ck,0,0,91.1929,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.6,1710.9,0.0 +gfx950,256,8295,8192,512,ck,0,0,91.2111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,762.88,1582.56,0.0 +gfx950,256,9111,8192,512,ck,0,0,91.2212,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,837.84,1733.52,0.0 +gfx950,256,9077,8192,512,ck,0,0,91.2213,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.71,1727.22,0.0 +gfx950,256,8114,8192,512,ck,18,0,91.2257,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,746.12,1548.78,0.0 +gfx950,256,9010,8192,512,ck,0,0,91.2429,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.35,1714.4,0.0 +gfx950,256,8255,8192,512,ck,0,0,91.2487,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.89,1574.5,0.0 +gfx950,256,8089,8192,512,ck,18,0,91.2639,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,743.51,1543.5,0.0 +gfx950,256,4046,8192,1536,ck,0,0,91.2704,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1115.6,932.25,0.0 +gfx950,256,8760,8192,512,ck,0,0,91.2769,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,805.07,1667.49,0.0 +gfx950,256,8784,8192,512,ck,0,0,91.2805,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.24,1671.87,0.0 +gfx950,256,8079,8192,512,ck,18,0,91.2951,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,742.34,1541.12,0.0 +gfx950,256,8253,8192,512,ck,0,0,91.2961,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.31,1573.31,0.0 +gfx950,256,8094,8192,512,ck,18,0,91.2969,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,743.7,1543.87,0.0 +gfx950,256,1685,7168,4096,ck,14,0,91.297,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1083.76,661.77,0.0 +gfx950,256,8843,8192,512,ck,0,0,91.2997,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.49,1682.43,0.0 +gfx950,256,8734,8192,512,ck,0,0,91.3124,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.37,1662.03,0.0 +gfx950,256,4002,8192,1536,ck,0,0,91.3143,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1102.93,923.17,0.0 +gfx950,256,8069,8192,512,ck,18,0,91.3195,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,741.22,1538.86,0.0 +gfx950,256,8967,8192,512,ck,0,0,91.3233,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.67,1704.94,0.0 +gfx950,256,8995,8192,512,ck,0,0,91.3423,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.07,1709.76,0.0 +gfx950,256,8817,8192,512,ck,0,0,91.3428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,809.72,1676.83,0.0 +gfx950,256,9200,8192,512,ck,0,0,91.3513,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,844.82,1747.51,0.0 +gfx950,256,8066,8192,512,ck,18,0,91.3595,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,740.62,1537.63,0.0 +gfx950,256,8939,8192,512,ck,0,0,91.3645,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.73,1698.99,0.0 +gfx950,256,8297,8192,512,ck,0,0,91.3881,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.59,1579.86,0.0 +gfx950,256,8993,8192,512,ck,0,0,91.4001,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.37,1708.31,0.0 +gfx950,256,8864,8192,512,ck,0,0,91.4056,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.48,1684.37,0.0 +gfx950,256,9039,8192,512,ck,0,0,91.4117,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.48,1716.6,0.0 +gfx950,256,8853,8192,512,ck,0,0,91.4153,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,812.38,1682.15,0.0 +gfx950,256,8102,8192,512,ck,18,0,91.4167,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,743.46,1543.33,0.0 +gfx950,256,8736,8192,512,ck,0,0,91.4299,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,801.52,1660.26,0.0 +gfx950,256,8077,8192,512,ck,18,0,91.4329,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,741.03,1538.43,0.0 +gfx950,256,8319,8192,512,ck,0,0,91.4369,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,763.2,1583.08,0.0 +gfx950,256,8096,8192,512,ck,18,0,91.4379,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,742.74,1541.86,0.0 +gfx950,256,8937,8192,512,ck,0,0,91.4416,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.86,1697.19,0.0 +gfx950,256,8084,8192,512,ck,18,0,91.4659,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,741.41,1539.17,0.0 +gfx950,256,8119,8192,512,ck,18,0,91.4755,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,744.54,1545.47,0.0 +gfx950,256,8729,8192,512,ck,0,0,91.4757,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.48,1658.14,0.0 +gfx950,256,8871,8192,512,ck,0,0,91.5045,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.24,1683.84,0.0 +gfx950,256,8207,8192,512,ck,0,0,91.5173,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.27,1561.01,0.0 +gfx950,256,9082,8192,512,ck,0,0,91.5253,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.4,1722.41,0.0 +gfx950,256,8952,8192,512,ck,0,0,91.5256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.48,1698.4,0.0 +gfx950,256,9029,8192,512,ck,0,0,91.5297,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.5,1712.54,0.0 +gfx950,256,8873,8192,512,ck,0,0,91.5428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.09,1683.5,0.0 +gfx950,256,8095,8192,512,ck,18,0,91.5499,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,741.74,1539.79,0.0 +gfx950,256,8107,8192,512,ck,18,0,91.5737,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,742.64,1541.6,0.0 +gfx950,256,8087,8192,512,ck,18,0,91.5749,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,740.8,1537.89,0.0 +gfx950,256,8262,8192,512,ck,0,0,91.5885,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.72,1569.95,0.0 +gfx950,256,8103,8192,512,ck,18,0,91.5899,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,742.14,1540.59,0.0 +gfx950,256,8313,8192,512,ck,0,0,91.6005,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,761.29,1579.15,0.0 +gfx950,256,8100,8192,512,ck,18,0,91.6037,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,741.76,1539.81,0.0 +gfx950,256,8097,8192,512,ck,18,0,91.6063,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,741.46,1539.21,0.0 +gfx950,256,8109,8192,512,ck,18,0,91.6203,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,742.45,1541.19,0.0 +gfx950,256,8076,8192,512,ck,18,0,91.6271,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,739.37,1534.99,0.0 +gfx950,256,8123,8192,512,ck,18,0,91.6331,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,743.62,1543.55,0.0 +gfx950,256,9208,8192,512,ck,0,0,91.6585,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,842.72,1743.13,0.0 +gfx950,256,8289,8192,512,ck,0,0,91.6613,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.59,1573.68,0.0 +gfx950,256,1710,7168,4096,ck,14,0,91.6675,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1095.39,664.13,0.0 +gfx950,256,9086,8192,512,ck,0,0,91.6729,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.42,1720.37,0.0 +gfx950,256,8091,8192,512,ck,18,0,91.6908,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,740.23,1536.68,0.0 +gfx950,256,8075,8192,512,ck,18,0,91.6935,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,738.74,1533.69,0.0 +gfx950,256,8118,8192,512,ck,18,0,91.6975,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,742.65,1541.55,0.0 +gfx950,256,8078,8192,512,ck,18,0,91.7047,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,738.93,1534.06,0.0 +gfx950,256,8080,8192,512,ck,18,0,91.7197,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,738.99,1534.17,0.0 +gfx950,256,8116,8192,512,ck,18,0,91.7207,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,742.27,1540.79,0.0 +gfx950,256,8240,8192,512,ck,0,0,91.7215,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,753.61,1563.62,0.0 +gfx950,256,8951,8192,512,ck,0,0,91.7256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.6,1694.51,0.0 +gfx950,256,9134,8192,512,ck,0,0,91.7389,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.21,1727.97,0.0 +gfx950,256,8071,8192,512,ck,18,0,91.7431,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,737.98,1532.13,0.0 +gfx950,256,8881,8192,512,ck,0,0,91.7517,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,811.97,1681.14,0.0 +gfx950,256,9078,8192,512,ck,0,0,91.7521,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.97,1717.41,0.0 +gfx950,256,8085,8192,512,ck,18,0,91.7551,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,739.16,1534.5,0.0 +gfx950,256,8120,8192,512,ck,18,0,91.7637,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,742.29,1540.8,0.0 +gfx950,256,8113,8192,512,ck,18,0,91.7931,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,741.41,1539.02,0.0 +gfx950,256,8121,8192,512,ck,18,0,91.7977,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,742.11,1540.42,0.0 +gfx950,256,9079,8192,512,ck,0,0,91.8033,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.6,1716.64,0.0 +gfx950,256,8068,8192,512,ck,18,0,91.8047,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,737.21,1530.55,0.0 +gfx950,256,9012,8192,512,ck,0,0,91.8137,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.39,1704.11,0.0 +gfx950,256,8082,8192,512,ck,18,0,91.8191,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,738.37,1532.88,0.0 +gfx950,256,3971,8192,1536,ck,0,0,91.8233,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1088.32,912.0,0.0 +gfx950,256,8975,8192,512,ck,0,0,91.828,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.88,1697.04,0.0 +gfx950,256,8940,8192,512,ck,0,0,91.8324,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.64,1690.52,0.0 +gfx950,256,9036,8192,512,ck,0,0,91.8538,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.22,1707.79,0.0 +gfx950,256,9052,8192,512,ck,0,0,91.8698,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.54,1710.43,0.0 +gfx950,256,8310,8192,512,ck,0,0,91.886,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,758.65,1573.69,0.0 +gfx950,256,8277,8192,512,ck,0,0,91.8861,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.64,1567.62,0.0 +gfx950,256,9080,8192,512,ck,0,0,91.904,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.78,1714.94,0.0 +gfx950,256,8897,8192,512,ck,0,0,91.914,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,811.99,1681.12,0.0 +gfx950,256,8117,8192,512,ck,18,0,91.9155,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,740.79,1537.71,0.0 +gfx950,256,8065,8192,512,ck,18,0,91.9281,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,735.95,1527.94,0.0 +gfx950,256,9072,8192,512,ck,0,0,91.933,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.79,1712.93,0.0 +gfx950,256,8286,8192,512,ck,0,0,91.9334,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.07,1568.47,0.0 +gfx950,256,8308,8192,512,ck,0,0,91.9623,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.84,1572.02,0.0 +gfx950,256,8963,8192,512,ck,0,0,91.994,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.3,1691.78,0.0 +gfx950,256,4050,8192,1536,ck,0,0,91.9944,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1107.91,925.7,0.0 +gfx950,256,9177,8192,512,ck,0,0,91.9961,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.8,1731.04,0.0 +gfx950,256,8996,8192,512,ck,0,0,92.0194,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.09,1697.37,0.0 +gfx950,256,8311,8192,512,ck,0,0,92.0201,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,757.64,1571.58,0.0 +gfx950,256,8997,8192,512,ck,0,0,92.0203,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.17,1697.53,0.0 +gfx950,256,9160,8192,512,ck,0,0,92.0364,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.88,1727.16,0.0 +gfx950,256,8093,8192,512,ck,18,0,92.0367,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,737.63,1531.28,0.0 +gfx950,256,8994,8192,512,ck,0,0,92.0562,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.58,1696.32,0.0 +gfx950,256,9186,8192,512,ck,0,0,92.0734,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.92,1731.24,0.0 +gfx950,256,9130,8192,512,ck,0,0,92.0768,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.78,1720.9,0.0 +gfx950,256,8231,8192,512,ck,0,0,92.0795,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,749.86,1555.89,0.0 +gfx950,256,8070,8192,512,ck,14,0,92.0917,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,735.09,1526.14,0.0 +gfx950,256,1718,7168,4096,ck,14,0,92.1082,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1095.25,662.55,0.0 +gfx950,256,8112,8192,512,ck,14,0,92.1089,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,738.78,1533.56,0.0 +gfx950,256,8104,8192,512,ck,18,0,92.1095,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,738.05,1532.08,0.0 +gfx950,256,8074,8192,512,ck,18,0,92.1103,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,735.31,1526.57,0.0 +gfx950,256,9099,8192,512,ck,0,0,92.1256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.52,1714.3,0.0 +gfx950,256,9158,8192,512,ck,0,0,92.1265,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.88,1725.1,0.0 +gfx950,256,9162,8192,512,ck,0,0,92.1361,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.16,1725.66,0.0 +gfx950,256,9213,8192,512,ck,0,0,92.1492,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,838.69,1734.76,0.0 +gfx950,256,9083,8192,512,ck,0,0,92.1644,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.72,1710.65,0.0 +gfx950,256,9085,8192,512,ck,0,0,92.1689,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.86,1710.93,0.0 +gfx950,256,8271,8192,512,ck,0,0,92.1731,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,752.74,1561.64,0.0 +gfx950,256,9155,8192,512,ck,0,0,92.1784,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.14,1723.58,0.0 +gfx950,256,8979,8192,512,ck,0,0,92.1883,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.04,1691.14,0.0 +gfx950,256,8884,8192,512,ck,0,0,92.1883,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.39,1673.73,0.0 +gfx950,256,9198,8192,512,ck,0,0,92.2009,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.85,1731.04,0.0 +gfx950,256,8110,8192,512,ck,18,0,92.2089,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,737.8,1531.53,0.0 +gfx950,256,9120,8192,512,ck,0,0,92.2193,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.59,1716.41,0.0 +gfx950,256,8314,8192,512,ck,0,0,92.2237,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,756.24,1568.66,0.0 +gfx950,256,8111,8192,512,ck,18,0,92.2307,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,737.72,1531.35,0.0 +gfx950,256,9163,8192,512,ck,0,0,92.2328,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.38,1724.03,0.0 +gfx950,256,9185,8192,512,ck,0,0,92.2465,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.26,1727.81,0.0 +gfx950,256,8101,8192,512,ck,18,0,92.2563,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,736.6,1529.1,0.0 +gfx950,256,8086,8192,512,ck,18,0,92.2572,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,735.23,1526.33,0.0 +gfx950,256,9153,8192,512,ck,0,0,92.268,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.15,1721.54,0.0 +gfx950,256,9205,8192,512,ck,0,0,92.2852,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.72,1730.74,0.0 +gfx950,256,9204,8192,512,ck,0,0,92.2921,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.57,1730.43,0.0 +gfx950,256,9144,8192,512,ck,0,0,92.296,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.08,1719.37,0.0 +gfx950,256,8088,8192,512,ck,14,0,92.3259,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,734.86,1525.56,0.0 +gfx950,256,8083,8192,512,ck,18,0,92.3525,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,734.2,1524.21,0.0 +gfx950,256,8833,8192,512,ck,0,0,92.3865,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,802.03,1660.81,0.0 +gfx950,256,8081,8192,512,ck,18,0,92.4097,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,733.56,1522.9,0.0 +gfx950,256,9109,8192,512,ck,0,0,92.4277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.72,1710.53,0.0 +gfx950,256,8928,8192,512,ck,0,0,92.4464,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,810.13,1677.1,0.0 +gfx950,256,8072,8192,512,ck,18,0,92.4627,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,732.33,1520.38,0.0 +gfx950,256,8122,8192,512,ck,18,0,92.468,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,736.82,1529.43,0.0 +gfx950,256,8124,8192,512,ck,18,0,92.4695,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,736.99,1529.77,0.0 +gfx950,256,9127,8192,512,ck,0,0,92.4985,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.72,1712.5,0.0 +gfx950,256,8105,8192,512,ck,18,0,92.5275,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,734.8,1525.35,0.0 +gfx950,256,8178,8192,512,ck,14,0,92.5343,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,741.37,1538.56,0.0 +gfx950,256,9139,8192,512,ck,0,0,92.5373,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.46,1713.98,0.0 +gfx950,256,8175,8192,512,ck,14,0,92.5494,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,740.98,1537.76,0.0 +gfx950,256,9143,8192,512,ck,0,0,92.5873,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.38,1713.78,0.0 +gfx950,256,5430,6144,1536,ck,0,0,92.6157,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1106.59,912.39,0.0 +gfx950,256,9128,8192,512,ck,0,0,92.6169,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.75,1710.5,0.0 +gfx950,256,8090,8192,512,ck,18,0,92.6423,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,732.54,1520.72,0.0 +gfx950,256,8098,8192,512,ck,18,0,92.6569,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,733.15,1521.94,0.0 +gfx950,256,8073,8192,512,ck,18,0,92.6769,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,730.72,1517.05,0.0 +gfx950,256,9042,8192,512,ck,0,0,92.7013,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.22,1693.27,0.0 +gfx950,256,8177,8192,512,ck,14,0,92.7272,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,739.74,1535.18,0.0 +gfx950,256,9344,8192,512,ck,0,0,92.7391,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,845.2,1747.6,0.0 +gfx950,256,9035,8192,512,ck,0,0,92.7638,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.03,1690.85,0.0 +gfx950,256,8149,8192,512,ck,14,0,92.8023,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,736.61,1528.84,0.0 +gfx950,256,9071,8192,512,ck,0,0,92.8026,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.95,1696.7,0.0 +gfx950,256,9065,8192,512,ck,0,0,92.8183,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.26,1695.32,0.0 +gfx950,256,8984,8192,512,ck,0,0,92.8192,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,811.94,1680.56,0.0 +gfx950,256,8067,8192,512,ck,18,0,92.8375,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,728.92,1513.34,0.0 +gfx950,256,9131,8192,512,ck,0,0,92.8428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.01,1706.88,0.0 +gfx950,256,9126,8192,512,ck,0,0,92.8891,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.15,1705.12,0.0 +gfx950,256,734,9216,7168,ck,0,0,92.8962,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1043.92,913.39,0.0 +gfx950,256,8176,8192,512,ck,14,0,92.9139,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,738.16,1531.91,0.0 +gfx950,256,9176,8192,512,ck,0,0,92.9592,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.04,1712.92,0.0 +gfx950,256,9136,8192,512,ck,0,0,92.9632,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.39,1705.58,0.0 +gfx950,256,9124,8192,512,ck,0,0,92.9661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.29,1703.35,0.0 +gfx950,256,8182,8192,512,ck,14,0,92.967,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,738.28,1532.13,0.0 +gfx950,256,8171,8192,512,ck,14,0,92.9963,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,737.05,1529.65,0.0 +gfx950,256,8183,8192,512,ck,14,0,93.0431,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,737.77,1531.06,0.0 +gfx950,256,8376,8192,512,ck,0,0,93.0468,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,755.14,1566.04,0.0 +gfx950,256,8132,8192,512,ck,14,0,93.0488,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,733.12,1521.7,0.0 +gfx950,256,8128,8192,512,ck,17,0,93.0675,a8w8_blockscale_1x128x128_256x64x128x256_16x16_32x32_2x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,732.61,1520.67,0.0 +gfx950,256,8962,8192,512,ck,0,0,93.074,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.73,1671.96,0.0 +gfx950,256,9093,8192,512,ck,0,0,93.0981,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.33,1695.3,0.0 +gfx950,256,8188,8192,512,ck,14,0,93.1334,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,737.5,1530.48,0.0 +gfx950,256,8180,8192,512,ck,14,0,93.1897,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,736.33,1528.1,0.0 +gfx950,256,8189,8192,512,ck,14,0,93.1926,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,737.12,1529.69,0.0 +gfx950,256,8169,8192,512,ck,14,0,93.2047,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,735.23,1525.86,0.0 +gfx950,256,9142,8192,512,ck,0,0,93.2465,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.43,1701.49,0.0 +gfx950,256,8145,8192,512,ck,14,0,93.2639,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,732.6,1520.55,0.0 +gfx950,256,8164,8192,512,ck,14,0,93.2789,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,734.19,1523.74,0.0 +gfx950,256,8160,8192,512,ck,14,0,93.2997,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,733.67,1522.68,0.0 +gfx950,256,8138,8192,512,ck,14,0,93.3659,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,731.17,1517.62,0.0 +gfx950,256,8158,8192,512,ck,14,0,93.3822,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,732.84,1520.97,0.0 +gfx950,256,9193,8192,512,ck,0,0,93.4004,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.65,1707.91,0.0 +gfx950,256,8130,8192,512,ck,14,0,93.4207,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,730.02,1515.28,0.0 +gfx950,256,9276,8192,512,ck,0,0,93.4268,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.87,1722.44,0.0 +gfx950,256,8159,8192,512,ck,14,0,93.4557,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,732.35,1519.96,0.0 +gfx950,256,8181,8192,512,ck,14,0,93.4851,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,734.1,1523.46,0.0 +gfx950,256,8162,8192,512,ck,14,0,93.5385,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,731.97,1519.15,0.0 +gfx950,256,8154,8192,512,ck,14,0,93.5535,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,731.14,1517.47,0.0 +gfx950,256,8179,8192,512,ck,14,0,93.5768,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,733.2,1521.6,0.0 +gfx950,256,9323,8192,512,ck,0,0,93.6331,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.25,1727.12,0.0 +gfx950,256,8147,8192,512,ck,14,0,93.6565,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,729.71,1514.53,0.0 +gfx950,256,8155,8192,512,ck,0,0,93.6773,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,730.26,1515.64,0.0 +gfx950,256,8143,8192,512,ck,14,0,93.6861,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,729.12,1513.33,0.0 +gfx950,256,8152,8192,512,ck,14,0,93.7227,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,729.64,1514.37,0.0 +gfx950,256,8136,8192,512,ck,14,0,93.7301,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,728.15,1511.36,0.0 +gfx950,256,8170,8192,512,ck,14,0,93.7347,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,731.16,1517.42,0.0 +gfx950,256,8185,8192,512,ck,14,0,93.7355,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,732.49,1520.11,0.0 +gfx950,256,8144,8192,512,ck,14,0,93.7511,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,728.7,1512.47,0.0 +gfx950,256,8150,8192,512,ck,14,0,93.7703,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,729.09,1513.24,0.0 +gfx950,256,8135,8192,512,ck,0,0,93.7953,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,727.56,1510.13,0.0 +gfx950,256,8172,8192,512,ck,14,0,93.8023,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,730.81,1516.68,0.0 +gfx950,256,8142,8192,512,ck,14,0,93.8063,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,728.1,1511.22,0.0 +gfx950,256,8146,8192,512,ck,14,0,93.8154,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,728.38,1511.79,0.0 +gfx950,256,8161,8192,512,ck,14,0,93.8255,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,729.65,1514.33,0.0 +gfx950,256,8167,8192,512,ck,14,0,93.8387,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,730.08,1515.2,0.0 +gfx950,256,9151,8192,512,ck,0,0,93.842,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.01,1692.31,0.0 +gfx950,256,8168,8192,512,ck,18,0,93.8867,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,729.8,1514.6,0.0 +gfx950,256,9051,8192,512,ck,0,0,93.8895,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.67,1673.46,0.0 +gfx950,256,8173,8192,512,ck,14,0,93.9046,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,730.1,1515.21,0.0 +gfx950,256,8174,8192,512,ck,14,0,93.9187,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,730.08,1515.16,0.0 +gfx950,256,5448,6144,1536,ck,0,0,93.9207,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1094.83,902.36,0.0 +gfx950,256,8153,8192,512,ck,14,0,93.9347,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,728.08,1511.13,0.0 +gfx950,256,8187,8192,512,ck,18,0,93.945,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,731.04,1517.08,0.0 +gfx950,256,9291,8192,512,ck,0,0,93.9489,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.58,1715.56,0.0 +gfx950,256,9341,8192,512,ck,0,0,93.9612,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.94,1724.33,0.0 +gfx950,256,9251,8192,512,ck,0,0,93.9672,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.85,1708.03,0.0 +gfx950,256,8129,8192,512,ck,14,0,93.9981,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,725.45,1505.8,0.0 +gfx950,256,9271,8192,512,ck,0,0,94.0332,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.06,1710.43,0.0 +gfx950,256,8131,8192,512,ck,0,0,94.0593,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.16,1505.17,0.0 +gfx950,256,9339,8192,512,ck,0,0,94.071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.79,1721.96,0.0 +gfx950,256,8134,8192,512,ck,0,0,94.0747,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.31,1505.47,0.0 +gfx950,256,8133,8192,512,ck,0,0,94.0759,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.21,1505.27,0.0 +gfx950,256,8711,8192,512,ck,0,0,94.0849,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,776.67,1608.92,0.0 +gfx950,256,8166,8192,512,ck,14,0,94.0881,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,728.06,1511.0,0.0 +gfx950,256,9255,8192,512,ck,0,0,94.0952,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.09,1706.43,0.0 +gfx950,256,8184,8192,512,ck,0,0,94.1179,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,729.43,1513.75,0.0 +gfx950,256,8140,8192,512,ck,0,0,94.1379,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.35,1505.53,0.0 +gfx950,256,8156,8192,512,ck,14,0,94.1729,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,726.51,1507.84,0.0 +gfx950,256,5447,6144,1536,ck,0,0,94.1794,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1091.63,899.74,0.0 +gfx950,256,9317,8192,512,ck,0,0,94.1844,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.83,1715.94,0.0 +gfx950,256,9431,8192,512,ck,0,0,94.1891,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,839.94,1736.3,0.0 +gfx950,256,9247,8192,512,ck,0,0,94.1996,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.46,1703.1,0.0 +gfx950,256,8165,8192,512,ck,14,0,94.2242,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,726.91,1508.64,0.0 +gfx950,256,8141,8192,512,ck,14,0,94.2443,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,724.62,1504.01,0.0 +gfx950,256,8139,8192,512,ck,14,0,94.248,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,724.42,1503.6,0.0 +gfx950,256,8151,8192,512,ck,18,0,94.2557,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,725.43,1505.62,0.0 +gfx950,256,9453,8192,512,ck,0,0,94.2703,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,841.17,1738.75,0.0 +gfx950,256,8148,8192,512,ck,0,0,94.2766,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.0,1504.75,0.0 +gfx950,256,8137,8192,512,ck,0,0,94.287,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,723.94,1502.61,0.0 +gfx950,256,9249,8192,512,ck,0,0,94.3064,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.7,1701.53,0.0 +gfx950,256,9230,8192,512,ck,0,0,94.3269,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.84,1697.76,0.0 +gfx950,256,9324,8192,512,ck,0,0,94.3302,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.17,1714.54,0.0 +gfx950,256,8157,8192,512,ck,0,0,94.335,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.35,1505.43,0.0 +gfx950,256,9232,8192,512,ck,0,0,94.3592,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.73,1697.54,0.0 +gfx950,256,9378,8192,512,ck,0,0,94.3599,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.71,1723.67,0.0 +gfx950,256,9332,8192,512,ck,0,0,94.3608,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.61,1715.41,0.0 +gfx950,256,9238,8192,512,ck,0,0,94.3876,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.02,1698.1,0.0 +gfx950,256,5440,6144,1536,ck,0,0,94.4303,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1087.33,896.32,0.0 +gfx950,256,8163,8192,512,ck,0,0,94.431,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,725.15,1504.98,0.0 +gfx950,256,9471,8192,512,ck,0,0,94.4377,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,841.28,1738.89,0.0 +gfx950,256,9343,8192,512,ck,0,0,94.4579,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.73,1715.62,0.0 +gfx950,256,9235,8192,512,ck,0,0,94.4993,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.78,1695.56,0.0 +gfx950,256,806,9216,7168,ck,0,0,94.5102,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1126.75,917.3,0.0 +gfx950,256,9379,8192,512,ck,0,0,94.5413,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.19,1720.54,0.0 +gfx950,256,9289,8192,512,ck,0,0,94.5444,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.18,1704.4,0.0 +gfx950,256,5431,6144,1536,ck,0,0,94.5663,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1083.97,893.72,0.0 +gfx950,256,8186,8192,512,ck,14,0,94.5709,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,726.11,1506.86,0.0 +gfx950,256,9333,8192,512,ck,0,0,94.5994,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.6,1711.27,0.0 +gfx950,256,8190,8192,512,ck,14,0,94.6541,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,725.83,1506.25,0.0 +gfx950,256,9298,8192,512,ck,0,0,94.6949,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.67,1703.29,0.0 +gfx950,256,9412,8192,512,ck,0,0,94.7213,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.54,1723.15,0.0 +gfx950,256,8685,8192,512,ck,0,0,94.732,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,769.06,1593.3,0.0 +gfx950,256,9233,8192,512,ck,0,0,94.7368,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.55,1690.95,0.0 +gfx950,256,9285,8192,512,ck,0,0,94.7417,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.11,1700.13,0.0 +gfx950,256,9409,8192,512,ck,0,0,94.7635,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.9,1721.85,0.0 +gfx950,256,9340,8192,512,ck,0,0,94.7822,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.63,1709.21,0.0 +gfx950,256,9390,8192,512,ck,0,0,94.8311,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.62,1717.24,0.0 +gfx950,256,9262,8192,512,ck,0,0,94.8329,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.29,1694.4,0.0 +gfx950,256,9293,8192,512,ck,0,0,94.837,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.99,1699.85,0.0 +gfx950,256,9277,8192,512,ck,0,0,94.8444,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.51,1696.87,0.0 +gfx950,256,9227,8192,512,ck,0,0,94.8969,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,815.64,1687.03,0.0 +gfx950,256,9429,8192,512,ck,0,0,94.9231,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.27,1722.52,0.0 +gfx950,256,9254,8192,512,ck,0,0,94.9257,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.78,1691.32,0.0 +gfx950,256,9257,8192,512,ck,0,0,94.9264,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.04,1691.84,0.0 +gfx950,256,9314,8192,512,ck,0,0,94.9727,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.67,1701.16,0.0 +gfx950,256,9300,8192,512,ck,0,0,94.992,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.27,1698.32,0.0 +gfx950,256,9319,8192,512,ck,0,0,95.0088,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.8,1701.4,0.0 +gfx950,256,5381,6144,1536,ck,0,0,95.0098,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1068.97,882.27,0.0 +gfx950,256,9275,8192,512,ck,0,0,95.0142,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.87,1693.48,0.0 +gfx950,256,8191,8192,512,ck,18,0,95.0282,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,723.06,1500.5,0.0 +gfx950,256,9353,8192,512,ck,0,0,95.0304,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.62,1707.06,0.0 +gfx950,256,9432,8192,512,ck,0,0,95.0435,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.48,1720.87,0.0 +gfx950,256,9451,8192,512,ck,0,0,95.0671,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.95,1723.82,0.0 +gfx950,256,9330,8192,512,ck,0,0,95.0838,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.12,1702.01,0.0 +gfx950,256,9334,8192,512,ck,0,0,95.0942,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.39,1702.54,0.0 +gfx950,256,9335,8192,512,ck,0,0,95.1044,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.39,1702.53,0.0 +gfx950,256,9373,8192,512,ck,0,0,95.1595,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.26,1708.3,0.0 +gfx950,256,9426,8192,512,ck,0,0,95.1695,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.84,1717.53,0.0 +gfx950,256,9313,8192,512,ck,0,0,95.1757,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.83,1697.35,0.0 +gfx950,256,5489,6144,1536,ck,0,0,95.1807,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1088.47,896.37,0.0 +gfx950,256,9090,8192,512,ck,0,0,95.2183,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,800.82,1657.02,0.0 +gfx950,256,9408,8192,512,ck,0,0,95.2601,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.47,1712.7,0.0 +gfx950,256,9292,8192,512,ck,0,0,95.288,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.01,1691.63,0.0 +gfx950,256,9296,8192,512,ck,0,0,95.302,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.25,1692.09,0.0 +gfx950,256,9398,8192,512,ck,0,0,95.3437,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.86,1709.43,0.0 +gfx950,256,9422,8192,512,ck,0,0,95.3514,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.91,1713.54,0.0 +gfx950,256,830,9216,7168,ck,0,0,95.363,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1149.92,915.54,0.0 +gfx950,256,9458,8192,512,ck,0,0,95.3825,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.8,1719.36,0.0 +gfx950,256,9457,8192,512,ck,0,0,95.3861,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.68,1719.12,0.0 +gfx950,256,9467,8192,512,ck,0,0,95.3885,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.54,1720.84,0.0 +gfx950,256,9434,8192,512,ck,0,0,95.3997,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.54,1714.8,0.0 +gfx950,256,9416,8192,512,ck,0,0,95.4311,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.69,1711.05,0.0 +gfx950,256,9258,8192,512,ck,0,0,95.4345,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,813.77,1683.01,0.0 +gfx950,256,5441,6144,1536,ck,0,0,95.4435,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1075.98,886.95,0.0 +gfx950,256,9615,8192,512,ck,0,0,95.4653,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,844.88,1745.65,0.0 +gfx950,256,9406,8192,512,ck,0,0,95.4747,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.43,1708.5,0.0 +gfx950,256,9338,8192,512,ck,0,0,95.4764,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.44,1696.43,0.0 +gfx950,256,9380,8192,512,ck,0,0,95.5433,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.55,1702.67,0.0 +gfx950,256,9695,8192,512,ck,0,0,95.5435,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,851.21,1758.37,0.0 +gfx950,256,9450,8192,512,ck,0,0,95.5459,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.68,1715.0,0.0 +gfx950,256,9228,8192,512,ck,0,0,95.5764,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,809.93,1675.21,0.0 +gfx950,256,9424,8192,512,ck,0,0,95.6421,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.56,1708.68,0.0 +gfx950,256,9386,8192,512,ck,0,0,95.6423,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.23,1701.97,0.0 +gfx950,256,9621,8192,512,ck,0,0,95.6452,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,843.81,1743.43,0.0 +gfx950,256,9547,8192,512,ck,0,0,95.6812,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,837.01,1729.71,0.0 +gfx950,256,9610,8192,512,ck,0,0,95.6839,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,842.51,1740.78,0.0 +gfx950,256,9372,8192,512,ck,0,0,95.7125,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.4,1698.25,0.0 +gfx950,256,9400,8192,512,ck,0,0,95.7168,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.81,1703.11,0.0 +gfx950,256,9381,8192,512,ck,0,0,95.7432,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.92,1699.29,0.0 +gfx950,256,9522,8192,512,ck,0,0,95.7523,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.2,1724.01,0.0 +gfx950,256,9421,8192,512,ck,0,0,95.7623,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.26,1706.01,0.0 +gfx950,256,9337,8192,512,ck,0,0,95.8288,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.34,1690.02,0.0 +gfx950,256,9469,8192,512,ck,0,0,95.8413,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.78,1713.07,0.0 +gfx950,256,9564,8192,512,ck,0,0,95.8632,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.91,1729.42,0.0 +gfx950,256,9482,8192,512,ck,0,0,95.8758,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.62,1714.74,0.0 +gfx950,256,9633,8192,512,ck,0,0,95.8883,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,842.72,1741.12,0.0 +gfx950,256,9245,8192,512,ck,0,0,95.8911,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,808.76,1672.71,0.0 +gfx950,256,9436,8192,512,ck,0,0,95.8994,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.4,1706.21,0.0 +gfx950,256,9466,8192,512,ck,0,0,95.9119,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.91,1711.28,0.0 +gfx950,256,9443,8192,512,ck,0,0,95.9199,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.83,1707.08,0.0 +gfx950,256,9611,8192,512,ck,0,0,95.9275,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,840.46,1736.54,0.0 +gfx950,256,9532,8192,512,ck,0,0,95.9705,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.17,1721.85,0.0 +gfx950,256,9691,8192,512,ck,0,0,96.0242,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,846.6,1748.87,0.0 +gfx950,256,4176,8192,1536,ck,0,0,96.0277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1094.4,910.33,0.0 +gfx950,256,9538,8192,512,ck,0,0,96.0403,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.09,1721.66,0.0 +gfx950,256,9480,8192,512,ck,0,0,96.0578,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.88,1711.14,0.0 +gfx950,256,9474,8192,512,ck,0,0,96.0627,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.31,1710.0,0.0 +gfx950,256,9515,8192,512,ck,0,0,96.0651,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.87,1717.17,0.0 +gfx950,256,4123,8192,1536,ck,0,0,96.0728,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1080.0,900.02,0.0 +gfx950,256,4294,8192,1536,ck,0,0,96.0788,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1124.72,931.85,0.0 +gfx950,256,9576,8192,512,ck,0,0,96.1153,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.76,1726.99,0.0 +gfx950,256,9648,8192,512,ck,0,0,96.1283,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,841.93,1739.41,0.0 +gfx950,256,9582,8192,512,ck,0,0,96.1539,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.95,1727.35,0.0 +gfx950,256,849,9216,7168,ck,0,0,96.1649,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1166.44,912.96,0.0 +gfx950,256,9693,8192,512,ck,0,0,96.1987,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,845.24,1746.04,0.0 +gfx950,256,9517,8192,512,ck,0,0,96.2237,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.67,1714.69,0.0 +gfx950,256,9592,8192,512,ck,0,0,96.2447,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.03,1727.48,0.0 +gfx950,256,9462,8192,512,ck,0,0,96.2489,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.66,1704.58,0.0 +gfx950,256,9719,8192,512,ck,0,0,96.2906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,846.7,1748.94,0.0 +gfx950,256,9597,8192,512,ck,0,0,96.2931,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.05,1727.49,0.0 +gfx950,256,9388,8192,512,ck,0,0,96.3352,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,817.48,1690.08,0.0 +gfx950,256,9481,8192,512,ck,0,0,96.3397,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.54,1706.31,0.0 +gfx950,256,9689,8192,512,ck,0,0,96.3399,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,843.65,1742.78,0.0 +gfx950,256,869,9216,7168,ck,0,0,96.3858,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1191.18,916.18,0.0 +gfx950,256,9572,8192,512,ck,0,0,96.3939,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.0,1721.3,0.0 +gfx950,256,9651,8192,512,ck,0,0,96.4095,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,839.74,1734.87,0.0 +gfx950,256,9650,8192,512,ck,0,0,96.4229,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,839.53,1734.45,0.0 +gfx950,256,814,9216,7168,ck,0,0,96.4329,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1115.24,901.13,0.0 +gfx950,256,9607,8192,512,ck,0,0,96.4444,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.6,1726.53,0.0 +gfx950,256,9624,8192,512,ck,0,0,96.4459,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,837.07,1729.48,0.0 +gfx950,256,9603,8192,512,ck,0,0,96.492,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.84,1724.98,0.0 +gfx950,256,9619,8192,512,ck,0,0,96.4967,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.19,1727.7,0.0 +gfx950,256,9594,8192,512,ck,0,0,96.515,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.86,1722.99,0.0 +gfx950,256,9712,8192,512,ck,0,0,96.5234,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,844.05,1743.5,0.0 +gfx950,256,9635,8192,512,ck,0,0,96.5355,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,837.25,1729.8,0.0 +gfx950,256,9445,8192,512,ck,0,0,96.5389,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.71,1696.49,0.0 +gfx950,256,9375,8192,512,ck,0,0,96.553,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,814.51,1683.99,0.0 +gfx950,256,9575,8192,512,ck,0,0,96.5703,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.74,1718.68,0.0 +gfx950,256,9673,8192,512,ck,0,0,96.5711,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,840.24,1735.81,0.0 +gfx950,256,9520,8192,512,ck,0,0,96.5715,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.95,1709.04,0.0 +gfx950,256,9589,8192,512,ck,0,0,96.5947,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.74,1720.7,0.0 +gfx950,256,9525,8192,512,ck,0,0,96.6054,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.09,1709.31,0.0 +gfx950,256,9528,8192,512,ck,0,0,96.6292,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.15,1709.41,0.0 +gfx950,256,9676,8192,512,ck,0,0,96.6339,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,839.96,1735.21,0.0 +gfx950,256,5540,6144,1536,ck,0,0,96.6736,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1081.62,889.82,0.0 +gfx950,256,9584,8192,512,ck,0,0,96.681,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.56,1718.29,0.0 +gfx950,256,9549,8192,512,ck,0,0,96.6929,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.43,1711.96,0.0 +gfx950,256,9677,8192,512,ck,0,0,96.6972,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,839.49,1734.25,0.0 +gfx950,256,9519,8192,512,ck,0,0,96.7103,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.67,1706.41,0.0 +gfx950,256,9628,8192,512,ck,0,0,96.7194,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.05,1725.29,0.0 +gfx950,256,9580,8192,512,ck,0,0,96.7276,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.82,1716.76,0.0 +gfx950,256,9679,8192,512,ck,0,0,96.7391,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,839.3,1733.85,0.0 +gfx950,256,9642,8192,512,ck,0,0,96.751,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.99,1727.17,0.0 +gfx950,256,9710,8192,512,ck,0,0,96.7705,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,841.72,1738.7,0.0 +gfx950,256,9669,8192,512,ck,0,0,96.7803,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,838.08,1731.36,0.0 +gfx950,256,9494,8192,512,ck,0,0,96.782,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.9,1700.78,0.0 +gfx950,256,809,9216,7168,ck,0,0,96.8026,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1104.16,896.37,0.0 +gfx950,256,9652,8192,512,ck,0,0,96.8123,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.33,1727.82,0.0 +gfx950,256,9545,8192,512,ck,0,0,96.8559,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.68,1708.38,0.0 +gfx950,256,4180,8192,1536,ck,0,0,96.8707,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1085.91,903.15,0.0 +gfx950,256,9537,8192,512,ck,0,0,96.8758,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.82,1706.63,0.0 +gfx950,256,9570,8192,512,ck,0,0,96.8864,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.59,1712.2,0.0 +gfx950,256,9484,8192,512,ck,0,0,96.9312,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.76,1696.42,0.0 +gfx950,256,856,9216,7168,ck,0,0,96.9486,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1166.55,907.43,0.0 +gfx950,256,9646,8192,512,ck,0,0,96.9581,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.55,1724.18,0.0 +gfx950,256,9555,8192,512,ck,0,0,96.9647,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.62,1708.2,0.0 +gfx950,256,9554,8192,512,ck,0,0,96.9752,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.45,1707.85,0.0 +gfx950,256,9727,8192,512,ck,0,0,97.0199,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,841.02,1737.19,0.0 +gfx950,256,9577,8192,512,ck,0,0,97.0571,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.74,1710.41,0.0 +gfx950,256,9724,8192,512,ck,0,0,97.0623,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,840.4,1735.91,0.0 +gfx950,256,9639,8192,512,ck,0,0,97.0824,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.88,1720.75,0.0 +gfx950,256,9641,8192,512,ck,0,0,97.1087,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.83,1720.64,0.0 +gfx950,256,9548,8192,512,ck,0,0,97.1102,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.78,1704.43,0.0 +gfx950,256,9523,8192,512,ck,0,0,97.1196,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.54,1699.91,0.0 +gfx950,256,1808,7168,4096,ck,0,0,97.1262,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1093.08,645.4,0.0 +gfx950,256,9550,8192,512,ck,0,0,97.1431,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.67,1704.2,0.0 +gfx950,256,813,9216,7168,ck,0,0,97.155,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1105.59,894.17,0.0 +gfx950,256,9718,8192,512,ck,0,0,97.1587,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,839.04,1733.14,0.0 +gfx950,256,9513,8192,512,ck,0,0,97.1932,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.05,1696.89,0.0 +gfx950,256,9685,8192,512,ck,0,0,97.2075,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.78,1726.53,0.0 +gfx950,256,9690,8192,512,ck,0,0,97.2164,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.13,1727.25,0.0 +gfx950,256,9686,8192,512,ck,0,0,97.2691,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.33,1725.61,0.0 +gfx950,256,9687,8192,512,ck,0,0,97.3451,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.77,1724.44,0.0 +gfx950,256,9578,8192,512,ck,0,0,97.3886,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.01,1704.76,0.0 +gfx950,256,5922,6144,1536,ck,0,0,97.432,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1147.2,937.09,0.0 +gfx950,256,9477,8192,512,ck,0,0,97.5612,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,814.86,1684.25,0.0 +gfx950,256,5670,6144,1536,ck,0,0,97.7027,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1095.34,898.84,0.0 +gfx950,256,877,9216,7168,ck,0,0,97.785,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1184.94,905.16,0.0 +gfx950,256,9218,8192,512,ck,0,0,97.871,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,790.08,1634.21,0.0 +gfx950,256,1817,7168,4096,ck,0,0,98.0608,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1088.05,640.94,0.0 +gfx950,256,8987,8192,512,ck,0,0,98.2684,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,767.17,1587.88,0.0 +gfx950,256,5890,6144,1536,ck,0,0,98.3409,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1130.46,923.93,0.0 +gfx950,256,9822,8192,512,ck,0,0,98.4336,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,837.04,1728.54,0.0 +gfx950,256,9768,8192,512,ck,0,0,98.4498,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.3,1718.99,0.0 +gfx950,256,9766,8192,512,ck,0,0,98.508,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.64,1717.63,0.0 +gfx950,256,6008,6144,1536,ck,0,0,98.5328,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1150.86,938.69,0.0 +gfx950,256,9779,8192,512,ck,0,0,98.5542,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.36,1719.06,0.0 +gfx950,256,4507,8192,1536,ck,0,0,98.5765,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1150.6,946.96,0.0 +gfx950,256,9820,8192,512,ck,0,0,98.5832,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.6,1725.58,0.0 +gfx950,256,9814,8192,512,ck,0,0,98.6334,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.66,1723.67,0.0 +gfx950,256,9780,8192,512,ck,0,0,98.65,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.63,1717.56,0.0 +gfx950,256,9834,8192,512,ck,0,0,98.7146,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.68,1725.68,0.0 +gfx950,256,9800,8192,512,ck,0,0,98.7616,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.39,1719.04,0.0 +gfx950,256,9789,8192,512,ck,0,0,98.7726,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.37,1716.97,0.0 +gfx950,256,9851,8192,512,ck,0,0,98.784,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.53,1727.37,0.0 +gfx950,256,1685,7168,4608,ck,14,0,98.7878,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1126.77,657.48,0.0 +gfx950,256,1718,7168,4608,ck,14,0,98.8842,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1147.72,663.16,0.0 +gfx950,256,9939,8192,512,ck,0,0,98.8966,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,843.05,1740.44,0.0 +gfx950,256,9736,8192,512,ck,0,0,98.9015,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.79,1705.67,0.0 +gfx950,256,9781,8192,512,ck,0,0,98.9162,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.48,1713.11,0.0 +gfx950,256,9738,8192,512,ck,0,0,98.92,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.8,1705.7,0.0 +gfx950,256,9733,8192,512,ck,0,0,98.9275,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.31,1704.71,0.0 +gfx950,256,9773,8192,512,ck,0,0,98.9478,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.54,1711.19,0.0 +gfx950,256,9808,8192,512,ck,0,0,98.9496,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.49,1717.14,0.0 +gfx950,256,9784,8192,512,ck,0,0,98.9576,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.39,1712.9,0.0 +gfx950,256,9806,8192,512,ck,0,0,98.9654,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.19,1716.52,0.0 +gfx950,256,1821,7168,4096,ck,0,0,99.0182,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1079.9,635.49,0.0 +gfx950,256,9821,8192,512,ck,0,0,99.02,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.0,1718.14,0.0 +gfx950,256,9827,8192,512,ck,0,0,99.0356,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.38,1718.89,0.0 +gfx950,256,9795,8192,512,ck,0,0,99.052,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.53,1713.15,0.0 +gfx950,256,9757,8192,512,ck,0,0,99.0686,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.17,1706.38,0.0 +gfx950,256,9815,8192,512,ck,0,0,99.0996,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.82,1715.73,0.0 +gfx950,256,9823,8192,512,ck,0,0,99.1238,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.3,1716.68,0.0 +gfx950,256,9811,8192,512,ck,0,0,99.1372,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.17,1714.4,0.0 +gfx950,256,9809,8192,512,ck,0,0,99.1576,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.83,1713.71,0.0 +gfx950,256,9796,8192,512,ck,0,0,99.2162,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,828.24,1710.48,0.0 +gfx950,256,9886,8192,512,ck,0,0,99.2462,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.6,1725.29,0.0 +gfx950,256,1809,7168,4096,ck,0,0,99.2534,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1070.24,631.75,0.0 +gfx950,256,4522,8192,1536,ck,0,0,99.3024,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1145.99,942.75,0.0 +gfx950,256,9750,8192,512,ck,0,0,99.3122,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.55,1701.0,0.0 +gfx950,256,9830,8192,512,ck,0,0,99.322,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.23,1714.44,0.0 +gfx950,256,9772,8192,512,ck,0,0,99.3558,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.05,1704.0,0.0 +gfx950,256,4504,8192,1536,ck,0,0,99.3637,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1140.73,938.92,0.0 +gfx950,256,16384,512,7168,ck,0,0,99.403,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1209.81,1387.16,0.0 +gfx950,256,9732,8192,512,ck,0,0,99.4204,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.14,1696.09,0.0 +gfx950,256,9840,8192,512,ck,0,0,99.4982,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,829.6,1713.11,0.0 +gfx950,256,9914,8192,512,ck,0,0,99.5089,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.75,1725.49,0.0 +gfx950,256,9745,8192,512,ck,0,0,99.5114,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,821.48,1696.75,0.0 +gfx950,256,9964,8192,512,ck,0,0,99.5212,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,839.86,1733.76,0.0 +gfx950,256,9763,8192,512,ck,0,0,99.5256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.88,1699.56,0.0 +gfx950,256,9919,8192,512,ck,0,0,99.5315,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.98,1725.94,0.0 +gfx950,256,9891,8192,512,ck,0,0,99.5526,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.45,1720.83,0.0 +gfx950,256,9347,8192,512,ck,0,0,99.559,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,787.56,1628.39,0.0 +gfx950,256,9923,8192,512,ck,0,0,99.5596,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.08,1726.14,0.0 +gfx950,256,9862,8192,512,ck,0,0,99.568,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.87,1715.64,0.0 +gfx950,256,1816,7168,4096,ck,0,0,99.6026,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1070.61,630.83,0.0 +gfx950,256,9906,8192,512,ck,0,0,99.6166,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,834.17,1722.26,0.0 +gfx950,256,9979,8192,512,ck,0,0,99.6472,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,840.06,1734.11,0.0 +gfx950,256,9868,8192,512,ck,0,0,99.6504,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.69,1715.24,0.0 +gfx950,256,9825,8192,512,ck,0,0,99.6672,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.93,1707.66,0.0 +gfx950,256,9935,8192,512,ck,0,0,99.686,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.03,1725.98,0.0 +gfx950,256,9753,8192,512,ck,0,0,99.6928,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,820.66,1695.02,0.0 +gfx950,256,9876,8192,512,ck,0,0,99.6942,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.0,1715.84,0.0 +gfx950,256,9883,8192,512,ck,0,0,99.7158,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.41,1716.65,0.0 +gfx950,256,9871,8192,512,ck,0,0,99.7252,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.32,1714.46,0.0 +gfx950,256,9981,8192,512,ck,0,0,99.7298,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,839.54,1733.02,0.0 +gfx950,256,9775,8192,512,ck,0,0,99.7342,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.17,1698.04,0.0 +gfx950,256,9810,8192,512,ck,0,0,99.746,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.02,1703.77,0.0 +gfx950,256,6001,6144,1536,ck,0,0,99.7705,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1135.26,926.08,0.0 +gfx950,256,9743,8192,512,ck,0,0,99.7724,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,819.17,1691.97,0.0 +gfx950,256,9968,8192,512,ck,0,0,99.7906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,837.93,1729.76,0.0 +gfx950,256,9976,8192,512,ck,0,0,99.8288,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,838.28,1730.45,0.0 +gfx950,256,9739,8192,512,ck,0,0,99.832,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.34,1690.28,0.0 +gfx950,256,1710,7168,4608,ck,14,0,99.8486,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1131.34,655.24,0.0 +gfx950,256,9812,8192,512,ck,0,0,99.8552,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.28,1702.24,0.0 +gfx950,256,9941,8192,512,ck,0,0,99.8582,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,835.1,1724.02,0.0 +gfx950,256,9918,8192,512,ck,0,0,99.8643,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.11,1720.02,0.0 +gfx950,256,9975,8192,512,ck,0,0,99.8715,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,837.84,1729.54,0.0 +gfx950,256,9892,8192,512,ck,0,0,99.8804,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.79,1715.35,0.0 +gfx950,256,4598,8192,1536,ck,0,0,99.8932,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1158.36,950.81,0.0 +gfx950,256,9917,8192,512,ck,0,0,99.9047,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.69,1719.16,0.0 +gfx950,256,9816,8192,512,ck,0,0,99.9286,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.01,1701.67,0.0 +gfx950,256,9926,8192,512,ck,0,0,99.9706,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,832.9,1719.55,0.0 +gfx950,256,9737,8192,512,ck,0,0,99.9794,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,816.97,1687.45,0.0 +gfx950,256,9969,8192,512,ck,0,0,99.9942,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,836.31,1726.41,0.0 +gfx950,256,9915,8192,512,ck,0,0,100.0014,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,831.72,1717.16,0.0 +gfx950,256,9731,8192,512,ck,0,0,100.044,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,815.94,1685.35,0.0 +gfx950,256,9864,8192,512,ck,0,0,100.063,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,826.93,1707.49,0.0 +gfx950,256,9946,8192,512,ck,0,0,100.0644,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.79,1721.31,0.0 +gfx950,256,9853,8192,512,ck,0,0,100.0774,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.89,1705.39,0.0 +gfx950,256,9948,8192,512,ck,0,0,100.094,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.72,1721.14,0.0 +gfx950,256,9872,8192,512,ck,0,0,100.0968,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,827.32,1708.26,0.0 +gfx950,256,9767,8192,512,ck,0,0,100.1244,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,818.3,1690.07,0.0 +gfx950,256,5753,6144,1536,ck,0,0,100.1684,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1084.02,888.17,0.0 +gfx950,256,9922,8192,512,ck,0,0,100.2632,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.13,1713.85,0.0 +gfx950,256,9967,8192,512,ck,0,0,100.3412,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,833.25,1720.1,0.0 +gfx950,256,9867,8192,512,ck,0,0,100.4224,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,824.22,1701.88,0.0 +gfx950,256,6002,6144,1536,ck,0,0,100.4397,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1127.88,920.04,0.0 +gfx950,256,9473,8192,512,ck,0,0,100.4519,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,791.08,1635.11,0.0 +gfx950,256,6026,6144,1536,ck,0,0,100.5948,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1130.64,921.92,0.0 +gfx950,256,6108,6144,1536,ck,0,0,100.6552,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1145.34,932.63,0.0 +gfx950,256,9982,8192,512,ck,0,0,100.8127,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,830.6,1714.57,0.0 +gfx950,256,9730,8192,512,ck,0,0,101.1462,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,806.96,1666.82,0.0 +gfx950,256,9963,8192,512,ck,0,0,101.2759,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.23,1703.56,0.0 +gfx950,256,10003,8192,512,ck,0,0,101.6144,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,825.78,1704.53,0.0 +gfx950,256,9990,8192,512,ck,0,0,101.7797,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,823.37,1699.61,0.0 +gfx950,256,9988,8192,512,ck,0,0,101.8784,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.41,1697.63,0.0 +gfx950,256,10005,8192,512,ck,0,0,101.9815,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,822.97,1698.73,0.0 +gfx950,256,6107,6144,1536,ck,0,0,102.0516,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1129.49,919.73,0.0 +gfx950,256,1991,7168,4096,ck,0,0,102.1175,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1144.88,646.88,0.0 +gfx950,256,2004,7168,4096,ck,0,0,102.3746,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1149.46,647.6,0.0 +gfx950,256,1968,7168,4096,ck,0,0,102.3774,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1128.78,641.1,0.0 +gfx950,256,9857,8192,512,ck,0,0,102.4124,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.39,1667.16,0.0 +gfx950,256,2000,7168,4096,ck,0,0,102.5051,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1145.7,646.06,0.0 +gfx950,256,1978,7168,4096,ck,0,0,102.5358,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1132.76,641.91,0.0 +gfx950,256,9312,8192,512,ck,18,0,102.5802,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,761.5,1574.67,0.0 +gfx950,256,2009,7168,4096,ck,0,0,103.105,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1144.16,643.91,0.0 +gfx950,256,1987,7168,4096,ck,0,0,103.1454,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1131.19,639.72,0.0 +gfx950,256,2011,7168,4096,ck,0,0,103.1946,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1144.31,643.71,0.0 +gfx950,256,1944,7168,4096,ck,0,0,103.2213,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1105.9,631.57,0.0 +gfx950,256,2003,7168,4096,ck,0,0,103.2954,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1138.64,641.65,0.0 +gfx950,256,2006,7168,4096,ck,0,0,103.3642,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1139.59,641.76,0.0 +gfx950,256,1934,7168,4096,ck,0,0,103.4107,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1098.19,628.64,0.0 +gfx950,256,9986,8192,512,ck,0,0,103.7672,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,807.27,1666.4,0.0 +gfx950,256,1980,7168,4096,ck,0,0,103.8139,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1119.95,634.36,0.0 +gfx950,256,2037,7168,4096,ck,0,0,103.8355,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1151.95,644.35,0.0 +gfx950,256,2032,7168,4096,ck,0,0,103.9302,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1148.07,642.87,0.0 +gfx950,256,2035,7168,4096,ck,0,0,103.9345,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1149.72,643.38,0.0 +gfx950,256,2041,7168,4096,ck,0,0,103.9898,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1152.5,644.1,0.0 +gfx950,256,2002,7168,4096,ck,0,0,104.023,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1130.11,636.98,0.0 +gfx950,256,2012,7168,4096,ck,0,0,104.1378,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1134.51,638.05,0.0 +gfx950,256,9602,8192,512,ck,0,0,104.4272,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,771.33,1593.74,0.0 +gfx950,256,1809,7168,4608,ck,0,0,104.7631,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1140.7,642.4,0.0 +gfx950,256,6212,6144,1536,ck,0,0,104.9181,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1117.52,908.44,0.0 +gfx950,256,1808,7168,4608,ck,0,0,104.9555,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1137.98,641.04,0.0 +gfx950,256,2112,7168,4096,ck,0,0,105.1064,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1179.92,649.71,0.0 +gfx950,256,2054,7168,4096,ck,0,0,105.1291,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1147.27,639.4,0.0 +gfx950,256,1817,7168,4608,ck,0,0,105.2551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1140.39,640.84,0.0 +gfx950,256,1816,7168,4608,ck,0,0,105.3179,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1139.08,640.28,0.0 +gfx950,256,2161,7168,4096,ck,0,0,105.4095,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1203.82,656.41,0.0 +gfx950,256,2163,7168,4096,ck,0,0,105.6676,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1201.99,655.15,0.0 +gfx950,256,2091,7168,4096,ck,0,0,105.6896,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1161.74,642.46,0.0 +gfx950,256,2085,7168,4096,ck,0,0,105.7208,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1158.07,641.23,0.0 +gfx950,256,2087,7168,4096,ck,0,0,105.7408,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1158.96,641.45,0.0 +gfx950,256,2146,7168,4096,ck,0,0,105.827,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1190.75,651.21,0.0 +gfx950,256,6341,6144,1536,ck,0,0,106.0037,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1129.04,915.96,0.0 +gfx950,256,2164,7168,4096,ck,0,0,106.0607,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1198.09,652.9,0.0 +gfx950,256,2149,7168,4096,ck,0,0,106.0774,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1189.6,650.19,0.0 +gfx950,256,6307,6144,1536,ck,0,0,106.0825,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1122.15,910.85,0.0 +gfx950,256,2148,7168,4096,ck,0,0,106.088,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1188.93,649.95,0.0 +gfx950,256,2160,7168,4096,ck,0,0,106.1212,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1195.2,651.83,0.0 +gfx950,256,2109,7168,4096,ck,0,0,106.1515,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1166.64,642.79,0.0 +gfx950,256,1821,7168,4608,ck,0,0,106.2115,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1132.61,635.78,0.0 +gfx950,256,2088,7168,4096,ck,0,0,106.2181,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1154.3,638.74,0.0 +gfx950,256,6280,6144,1536,ck,0,0,106.2316,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1115.78,906.06,0.0 +gfx950,256,2086,7168,4096,ck,0,0,106.2355,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1153.01,638.29,0.0 +gfx950,256,2111,7168,4096,ck,0,0,106.3093,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1166.02,642.18,0.0 +gfx950,256,6146,6144,1536,ck,0,0,106.3417,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1090.84,887.7,0.0 +gfx950,256,2101,7168,4096,ck,0,0,106.3523,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1160.02,640.19,0.0 +gfx950,256,2056,7168,4096,ck,0,0,106.3727,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1134.96,632.27,0.0 +gfx950,256,2129,7168,4096,ck,0,0,106.4876,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1173.99,644.22,0.0 +gfx950,256,2126,7168,4096,ck,0,0,106.5367,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1171.8,643.41,0.0 +gfx950,256,2127,7168,4096,ck,0,0,106.6192,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1171.44,643.08,0.0 +gfx950,256,2128,7168,4096,ck,0,0,106.6743,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1171.39,642.92,0.0 +gfx950,256,2304,7168,4096,ck,0,0,106.7102,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1267.84,673.11,0.0 +gfx950,256,2130,7168,4096,ck,0,0,106.77,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1171.44,642.69,0.0 +gfx950,256,2150,7168,4096,ck,0,0,106.8415,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1181.64,645.71,0.0 +gfx950,256,2090,7168,4096,ck,0,0,106.8493,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1148.58,635.32,0.0 +gfx950,256,2166,7168,4096,ck,0,0,106.9455,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1189.28,647.84,0.0 +gfx950,256,6449,6144,1536,ck,0,0,107.107,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1136.44,920.46,0.0 +gfx950,256,1934,7168,4608,ck,0,0,107.1615,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1192.22,650.12,0.0 +gfx950,256,6225,6144,1536,ck,0,0,107.1793,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1096.23,890.95,0.0 +gfx950,256,2185,7168,4096,ck,0,0,107.3068,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1195.67,648.92,0.0 +gfx950,256,6580,6144,1536,ck,0,0,107.5313,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1154.95,933.67,0.0 +gfx950,256,2251,7168,4096,ck,0,0,107.5671,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1228.81,658.66,0.0 +gfx950,256,2217,7168,4096,ck,0,0,107.6667,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1209.13,652.23,0.0 +gfx950,256,2282,7168,4096,ck,0,0,107.6705,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1244.53,663.34,0.0 +gfx950,256,2248,7168,4096,ck,0,0,107.7229,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1225.4,657.2,0.0 +gfx950,256,2116,7168,4096,ck,0,0,107.7724,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1152.91,634.32,0.0 +gfx950,256,6304,6144,1536,ck,0,0,107.8169,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1103.57,895.81,0.0 +gfx950,256,2207,7168,4096,ck,0,0,107.9264,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1200.78,648.96,0.0 +gfx950,256,1987,7168,4608,ck,0,0,107.9427,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1216.03,654.72,0.0 +gfx950,256,2188,7168,4096,ck,0,0,107.9663,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1190.0,645.47,0.0 +gfx950,256,6435,6144,1536,ck,0,0,107.9721,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1124.89,911.3,0.0 +gfx950,256,2219,7168,4096,ck,0,0,108.016,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1206.31,650.47,0.0 +gfx950,256,2249,7168,4096,ck,0,0,108.0172,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1222.6,655.58,0.0 +gfx950,256,1978,7168,4608,ck,0,0,108.0175,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1209.69,652.68,0.0 +gfx950,256,2215,7168,4096,ck,0,0,108.1428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1202.72,649.02,0.0 +gfx950,256,2254,7168,4096,ck,0,0,108.1635,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1223.66,655.54,0.0 +gfx950,256,2301,7168,4096,ck,0,0,108.1765,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1249.03,663.47,0.0 +gfx950,256,6476,6144,1536,ck,0,0,108.1853,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1129.82,914.74,0.0 +gfx950,256,2000,7168,4608,ck,0,0,108.2816,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1220.16,654.94,0.0 +gfx950,256,1991,7168,4608,ck,0,0,108.2902,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1214.57,653.32,0.0 +gfx950,256,1944,7168,4608,ck,0,0,108.3047,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1185.74,645.01,0.0 +gfx950,256,2216,7168,4096,ck,0,0,108.3067,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1201.44,648.21,0.0 +gfx950,256,9911,8192,512,ck,18,0,108.3293,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_1x1_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,767.47,1584.53,0.0 +gfx950,256,6418,6144,1536,ck,0,0,108.3521,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1117.98,905.93,0.0 +gfx950,256,2235,7168,4096,ck,0,0,108.3611,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1211.13,651.12,0.0 +gfx950,256,2002,7168,4608,ck,0,0,108.3855,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1220.21,654.66,0.0 +gfx950,256,2189,7168,4096,ck,0,0,108.5021,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1184.67,642.46,0.0 +gfx950,256,2226,7168,4096,ck,0,0,108.5121,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1204.58,648.68,0.0 +gfx950,256,2012,7168,4608,ck,0,0,108.515,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1224.84,655.63,0.0 +gfx950,256,4898,8192,1536,ck,0,0,108.5736,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1135.29,924.3,0.0 +gfx950,256,2268,7168,4096,ck,0,0,108.6932,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1225.26,654.72,0.0 +gfx950,256,2252,7168,4096,ck,0,0,108.6975,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1216.57,651.98,0.0 +gfx950,256,2003,7168,4608,ck,0,0,108.7027,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1217.25,652.93,0.0 +gfx950,256,6583,6144,1536,ck,0,0,108.7029,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1143.02,923.99,0.0 +gfx950,256,1980,7168,4608,ck,0,0,108.7587,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1202.66,648.59,0.0 +gfx950,256,2037,7168,4608,ck,0,0,108.7684,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1237.17,658.45,0.0 +gfx950,256,2006,7168,4608,ck,0,0,108.8123,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1217.85,652.79,0.0 +gfx950,256,5004,8192,1536,ck,0,0,108.8688,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1156.71,939.25,0.0 +gfx950,256,5000,8192,1536,ck,0,0,108.8858,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1155.61,938.44,0.0 +gfx950,256,2186,7168,4096,ck,0,0,108.8864,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1178.87,639.68,0.0 +gfx950,256,2004,7168,4608,ck,0,0,108.9859,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1214.7,651.4,0.0 +gfx950,256,1968,7168,4608,ck,0,0,108.9996,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1192.73,645.07,0.0 +gfx950,256,2250,7168,4096,ck,0,0,109.4047,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1207.63,647.43,0.0 +gfx950,256,2032,7168,4608,ck,0,0,109.5251,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1225.6,653.04,0.0 +gfx950,256,2035,7168,4608,ck,0,0,109.5724,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1226.88,653.28,0.0 +gfx950,256,2011,7168,4608,ck,0,0,109.8439,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1209.42,647.52,0.0 +gfx950,256,5002,8192,1536,ck,0,0,109.8498,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1145.92,930.53,0.0 +gfx950,256,2009,7168,4608,ck,0,0,109.9759,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1206.77,646.4,0.0 +gfx950,256,5003,8192,1536,ck,0,0,110.031,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1144.26,929.16,0.0 +gfx950,256,2041,7168,4608,ck,0,0,110.0915,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1224.7,651.23,0.0 +gfx950,256,5059,8192,1536,ck,0,0,110.1793,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1155.52,937.02,0.0 +gfx950,256,5006,8192,1536,ck,0,0,110.2596,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1142.58,927.72,0.0 +gfx950,256,5007,8192,1536,ck,0,0,110.4,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1141.35,926.71,0.0 +gfx950,256,5018,8192,1536,ck,0,0,110.7672,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1140.07,925.41,0.0 +gfx950,256,6783,6144,1536,ck,0,0,110.9301,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1154.1,930.36,0.0 +gfx950,256,6886,6144,1536,ck,0,0,111.3705,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1167.0,939.47,0.0 +gfx950,256,2091,7168,4608,ck,0,0,112.2584,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1230.48,647.1,0.0 +gfx950,256,2126,7168,4608,ck,0,0,112.3112,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1250.49,652.7,0.0 +gfx950,256,2090,7168,4608,ck,0,0,112.3761,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1228.61,646.25,0.0 +gfx950,256,2101,7168,4608,ck,0,0,112.4049,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1234.76,647.94,0.0 +gfx950,256,2160,7168,4608,ck,0,0,112.4124,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1269.35,657.84,0.0 +gfx950,256,2087,7168,4608,ck,0,0,112.4429,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1226.11,645.36,0.0 +gfx950,256,2111,7168,4608,ck,0,0,112.6613,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1237.81,648.15,0.0 +gfx950,256,2086,7168,4608,ck,0,0,112.6681,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1223.08,643.9,0.0 +gfx950,256,2054,7168,4608,ck,0,0,112.7412,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1203.53,638.11,0.0 +gfx950,256,2109,7168,4608,ck,0,0,112.8499,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1234.57,646.73,0.0 +gfx950,256,2088,7168,4608,ck,0,0,112.8989,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1221.75,642.92,0.0 +gfx950,256,2085,7168,4608,ck,0,0,113.0416,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1218.45,641.61,0.0 +gfx950,256,2130,7168,4608,ck,0,0,113.0552,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1244.6,649.07,0.0 +gfx950,256,2056,7168,4608,ck,0,0,113.1989,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1199.83,635.86,0.0 +gfx950,256,2128,7168,4608,ck,0,0,113.2912,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1240.84,647.38,0.0 +gfx950,256,2129,7168,4608,ck,0,0,113.318,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1241.13,647.4,0.0 +gfx950,256,2166,7168,4608,ck,0,0,113.3613,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1262.22,653.33,0.0 +gfx950,256,2163,7168,4608,ck,0,0,113.4845,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1259.1,652.12,0.0 +gfx950,256,2150,7168,4608,ck,0,0,113.524,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1251.1,649.73,0.0 +gfx950,256,2149,7168,4608,ck,0,0,113.5631,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1250.09,649.34,0.0 +gfx950,256,2148,7168,4608,ck,0,0,113.6059,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1249.03,648.93,0.0 +gfx950,256,2161,7168,4608,ck,0,0,113.6889,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1255.67,650.62,0.0 +gfx950,256,6859,6144,1536,ck,0,0,113.7545,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1138.06,916.5,0.0 +gfx950,256,2164,7168,4608,ck,0,0,113.9192,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1254.88,649.8,0.0 +gfx950,256,2112,7168,4608,ck,0,0,113.9212,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1224.7,641.14,0.0 +gfx950,256,6856,6144,1536,ck,0,0,113.9269,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1135.84,914.75,0.0 +gfx950,256,6986,6144,1536,ck,0,0,113.9473,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1157.17,930.36,0.0 +gfx950,256,2116,7168,4608,ck,0,0,114.052,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1225.61,641.07,0.0 +gfx950,256,2127,7168,4608,ck,0,0,114.1444,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1230.99,642.38,0.0 +gfx950,256,2146,7168,4608,ck,0,0,114.2424,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1240.92,644.98,0.0 +gfx950,256,2216,7168,4608,ck,0,0,114.6209,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1277.16,654.42,0.0 +gfx950,256,6884,6144,1536,ck,0,0,114.6833,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1132.96,912.09,0.0 +gfx950,256,6819,6144,1536,ck,0,0,114.7918,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1121.2,903.4,0.0 +gfx950,256,2185,7168,4608,ck,0,0,114.8273,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1257.03,648.13,0.0 +gfx950,256,2188,7168,4608,ck,0,0,114.9788,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1257.1,647.77,0.0 +gfx950,256,6809,6144,1536,ck,0,0,114.9919,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1117.61,900.63,0.0 +gfx950,256,2215,7168,4608,ck,0,0,115.1176,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1271.08,651.43,0.0 +gfx950,256,2251,7168,4608,ck,0,0,115.1292,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1291.61,657.29,0.0 +gfx950,256,2252,7168,4608,ck,0,0,115.2608,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1290.71,656.7,0.0 +gfx950,256,6926,6144,1536,ck,0,0,115.2811,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1133.96,912.4,0.0 +gfx950,256,2254,7168,4608,ck,0,0,115.3712,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1290.62,656.4,0.0 +gfx950,256,6812,6144,1536,ck,0,0,115.3825,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1114.31,897.94,0.0 +gfx950,256,6826,6144,1536,ck,0,0,115.4461,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1115.99,899.12,0.0 +gfx950,256,2301,7168,4608,ck,0,0,115.4994,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1316.07,663.38,0.0 +gfx950,256,6814,6144,1536,ck,0,0,115.5468,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1113.06,896.9,0.0 +gfx950,256,7054,6144,1536,ck,0,0,115.5666,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1152.06,925.45,0.0 +gfx950,256,6880,6144,1536,ck,0,0,115.6493,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1122.84,903.99,0.0 +gfx950,256,2186,7168,4608,ck,0,0,115.6672,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1248.48,643.59,0.0 +gfx950,256,5124,8192,1536,ck,0,0,115.6721,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1114.79,902.59,0.0 +gfx950,256,6931,6144,1536,ck,0,0,115.6737,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1130.92,909.9,0.0 +gfx950,256,2217,7168,4608,ck,0,0,115.6992,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1265.83,648.48,0.0 +gfx950,256,2249,7168,4608,ck,0,0,115.7157,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1283.92,653.63,0.0 +gfx950,256,7247,6144,1536,ck,0,0,115.7498,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1181.71,947.04,0.0 +gfx950,256,6811,6144,1536,ck,0,0,115.8083,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1110.05,894.52,0.0 +gfx950,256,2282,7168,4608,ck,0,0,115.9047,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1300.63,657.96,0.0 +gfx950,256,6858,6144,1536,ck,0,0,115.9193,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1116.64,899.27,0.0 +gfx950,256,6827,6144,1536,ck,0,0,115.9221,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1111.57,895.55,0.0 +gfx950,256,5126,8192,1536,ck,0,0,115.9277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1112.76,900.91,0.0 +gfx950,256,2207,7168,4608,ck,0,0,115.9512,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1257.38,645.44,0.0 +gfx950,256,2250,7168,4608,ck,0,0,116.0633,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1280.64,651.84,0.0 +gfx950,256,5273,8192,1536,ck,0,0,116.2429,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1141.57,921.13,0.0 +gfx950,256,2226,7168,4608,ck,0,0,116.2591,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1264.85,646.83,0.0 +gfx950,256,7053,6144,1536,ck,0,0,116.303,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1144.6,919.48,0.0 +gfx950,256,2219,7168,4608,ck,0,0,116.3774,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1259.59,645.03,0.0 +gfx950,256,5272,8192,1536,ck,0,0,116.3893,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1139.92,919.82,0.0 +gfx950,256,5258,8192,1536,ck,0,0,116.4089,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1136.7,917.51,0.0 +gfx950,256,5269,8192,1536,ck,0,0,116.4225,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1138.94,919.1,0.0 +gfx950,256,2304,7168,4608,ck,0,0,116.4528,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1306.99,658.44,0.0 +gfx950,256,2248,7168,4608,ck,0,0,116.5335,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1274.34,648.88,0.0 +gfx950,256,2235,7168,4608,ck,0,0,116.6248,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1265.98,646.26,0.0 +gfx950,256,6925,6144,1536,ck,0,0,116.6701,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1120.3,901.42,0.0 +gfx950,256,7376,6144,1536,ck,0,0,116.7107,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1192.84,954.52,0.0 +gfx950,256,6817,6144,1536,ck,0,0,116.7281,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1102.28,888.18,0.0 +gfx950,256,5292,8192,1536,ck,0,0,116.8706,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1139.53,919.1,0.0 +gfx950,256,7000,6144,1536,ck,0,0,116.8713,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1130.48,908.74,0.0 +gfx950,256,7050,6144,1536,ck,0,0,116.9085,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1138.19,914.36,0.0 +gfx950,256,2268,7168,4608,ck,0,0,116.9586,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1281.01,649.76,0.0 +gfx950,256,6818,6144,1536,ck,0,0,117.0074,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1099.81,886.18,0.0 +gfx950,256,5281,8192,1536,ck,0,0,117.0153,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1135.76,916.28,0.0 +gfx950,256,5276,8192,1536,ck,0,0,117.0172,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1134.66,915.5,0.0 +gfx950,256,6813,6144,1536,ck,0,0,117.0689,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1098.42,885.12,0.0 +gfx950,256,7233,6144,1536,ck,0,0,117.2017,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1164.82,933.66,0.0 +gfx950,256,5302,8192,1536,ck,0,0,117.2225,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1138.26,917.87,0.0 +gfx950,256,5368,8192,1536,ck,0,0,117.2405,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1152.25,927.81,0.0 +gfx950,256,5283,8192,1536,ck,0,0,117.3065,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1133.36,914.31,0.0 +gfx950,256,5264,8192,1536,ck,0,0,117.4853,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1127.57,910.02,0.0 +gfx950,256,5295,8192,1536,ck,0,0,117.5085,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1133.99,914.57,0.0 +gfx950,256,2189,7168,4608,ck,0,0,117.5134,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1230.55,633.96,0.0 +gfx950,256,5285,8192,1536,ck,0,0,117.5218,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1131.72,912.94,0.0 +gfx950,256,5251,8192,1536,ck,0,0,117.7837,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1121.94,905.74,0.0 +gfx950,256,5277,8192,1536,ck,0,0,117.8109,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1127.23,909.48,0.0 +gfx950,256,7001,6144,1536,ck,0,0,117.8457,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1121.29,901.34,0.0 +gfx950,256,7009,6144,1536,ck,0,0,117.8481,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1122.55,902.26,0.0 +gfx950,256,7287,6144,1536,ck,0,0,117.8945,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1166.62,934.5,0.0 +gfx950,256,5275,8192,1536,ck,0,0,118.0598,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1124.43,907.26,0.0 +gfx950,256,7418,6144,1536,ck,0,0,118.1865,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1184.65,947.52,0.0 +gfx950,256,7246,6144,1536,ck,0,0,118.1996,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1157.06,927.29,0.0 +gfx950,256,6815,6144,1536,ck,0,0,118.3842,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1086.54,875.52,0.0 +gfx950,256,5447,8192,1536,ck,0,0,118.4537,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1157.23,930.26,0.0 +gfx950,256,5489,8192,1536,ck,0,0,118.756,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1163.19,934.23,0.0 +gfx950,256,7241,6144,1536,ck,0,0,118.9359,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1149.1,920.97,0.0 +gfx950,256,5431,8192,1536,ck,0,0,119.0165,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1148.38,923.46,0.0 +gfx950,256,5540,8192,1536,ck,0,0,119.1461,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1170.15,938.84,0.0 +gfx950,256,7238,6144,1536,ck,0,0,119.4147,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1144.02,916.93,0.0 +gfx950,256,5440,8192,1536,ck,0,0,119.7093,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1143.62,919.46,0.0 +gfx950,256,7242,6144,1536,ck,0,0,119.7546,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1141.4,914.79,0.0 +gfx950,256,7289,6144,1536,ck,0,0,119.8428,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1147.96,919.54,0.0 +gfx950,256,7528,6144,1536,ck,0,0,122.8386,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1156.69,924.01,0.0 +gfx950,256,2344,7168,4096,ck,0,0,123.5507,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1114.04,587.33,0.0 +gfx950,256,2390,7168,4096,ck,0,0,123.7846,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1133.76,593.07,0.0 +gfx950,256,2403,7168,4096,ck,0,0,123.9217,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1138.66,594.34,0.0 +gfx950,256,2358,7168,4096,ck,0,0,124.1341,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1115.43,586.65,0.0 +gfx950,256,2368,7168,4096,ck,0,0,124.138,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1120.12,588.11,0.0 +gfx950,256,2378,7168,4096,ck,0,0,124.2133,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1124.17,589.24,0.0 +gfx950,256,2387,7168,4096,ck,0,0,124.3866,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1126.85,589.75,0.0 +gfx950,256,902,9216,7168,ck,0,0,124.4168,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,957.85,716.56,0.0 +gfx950,256,2365,7168,4096,ck,0,0,124.5304,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1115.18,585.82,0.0 +gfx950,256,2464,7168,4096,ck,0,0,124.5317,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1161.85,600.46,0.0 +gfx950,256,7440,6144,1536,ck,0,0,124.5318,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1127.63,901.68,0.0 +gfx950,256,2369,7168,4096,ck,0,0,124.6327,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1116.15,585.93,0.0 +gfx950,256,2412,7168,4096,ck,0,0,124.7214,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1135.6,591.86,0.0 +gfx950,256,2355,7168,4096,ck,0,0,124.7783,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1108.26,583.17,0.0 +gfx950,256,2356,7168,4096,ck,0,0,124.9423,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1107.27,582.56,0.0 +gfx950,256,7491,6144,1536,ck,0,0,124.9782,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1131.3,904.1,0.0 +gfx950,256,2445,7168,4096,ck,0,0,125.0868,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1147.77,595.0,0.0 +gfx950,256,2441,7168,4096,ck,0,0,125.2899,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1144.04,593.44,0.0 +gfx950,256,2415,7168,4096,ck,0,0,125.4019,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1130.84,589.09,0.0 +gfx950,256,2443,7168,4096,ck,0,0,125.741,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1140.87,591.61,0.0 +gfx950,256,920,9216,7168,ck,0,0,125.7492,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,966.61,712.63,0.0 +gfx950,256,7536,6144,1536,ck,0,0,125.8336,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1130.36,902.9,0.0 +gfx950,256,2486,7168,4096,ck,0,0,125.8445,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1159.99,597.42,0.0 +gfx950,256,5670,8192,1536,ck,0,0,125.8787,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1133.55,907.14,0.0 +gfx950,256,7539,6144,1536,ck,0,0,125.8901,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1130.3,902.82,0.0 +gfx950,256,7844,6144,1536,ck,0,0,125.9425,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1175.54,935.92,0.0 +gfx950,256,2534,7168,4096,ck,0,0,125.9953,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1180.97,603.73,0.0 +gfx950,256,2479,7168,4096,ck,0,0,126.1111,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1154.28,595.13,0.0 +gfx950,256,2515,7168,4096,ck,0,0,126.1446,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1170.73,600.24,0.0 +gfx950,256,2444,7168,4096,ck,0,0,126.4812,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1134.65,588.29,0.0 +gfx950,256,930,9216,7168,ck,0,0,126.5447,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,970.98,710.17,0.0 +gfx950,256,7838,6144,1536,ck,0,0,126.5958,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1168.58,930.44,0.0 +gfx950,256,7834,6144,1536,ck,0,0,126.7098,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1166.93,929.17,0.0 +gfx950,256,2548,7168,4096,ck,0,0,126.7367,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1180.55,602.23,0.0 +gfx950,256,2544,7168,4096,ck,0,0,126.9542,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1176.68,600.62,0.0 +gfx950,256,7780,6144,1536,ck,0,0,126.9694,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1156.52,921.39,0.0 +gfx950,256,7656,6144,1536,ck,0,0,127.0138,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1137.69,907.57,0.0 +gfx950,256,910,9216,7168,ck,0,0,127.0161,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,946.57,703.5,0.0 +gfx950,256,8019,6144,1536,ck,0,0,127.1918,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1189.96,945.75,0.0 +gfx950,256,8016,6144,1536,ck,0,0,127.2722,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1188.77,944.83,0.0 +gfx950,256,5890,8192,1536,ck,0,0,127.5055,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1162.51,926.48,0.0 +gfx950,256,7966,6144,1536,ck,0,0,127.5995,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1178.32,936.99,0.0 +gfx950,256,7963,6144,1536,ck,0,0,127.8704,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1175.38,934.68,0.0 +gfx950,256,8055,6144,1536,ck,0,0,127.8925,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1188.76,944.46,0.0 +gfx950,256,6008,8192,1536,ck,0,0,128.1915,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1179.46,938.02,0.0 +gfx950,256,6002,8192,1536,ck,0,0,128.4072,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1176.3,935.61,0.0 +gfx950,256,7809,6144,1536,ck,0,0,128.4506,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1147.44,913.88,0.0 +gfx950,256,8004,6144,1536,ck,0,0,128.5127,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1175.53,934.42,0.0 +gfx950,256,7871,6144,1536,ck,0,0,128.6146,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1155.08,919.38,0.0 +gfx950,256,2597,7168,4096,ck,0,0,128.7191,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1184.72,599.97,0.0 +gfx950,256,2594,7168,4096,ck,0,0,128.7211,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1183.34,599.53,0.0 +gfx950,256,6001,8192,1536,ck,0,0,128.8872,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1171.72,931.98,0.0 +gfx950,256,1003,9216,7168,ck,0,0,128.912,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1027.96,711.63,0.0 +gfx950,256,1000,9216,7168,ck,0,0,129.0453,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1023.83,710.3,0.0 +gfx950,256,7990,6144,1536,ck,0,0,129.1133,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1168.01,928.57,0.0 +gfx950,256,8011,6144,1536,ck,0,0,129.1265,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1170.96,930.72,0.0 +gfx950,256,1007,9216,7168,ck,0,0,129.145,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1030.2,711.13,0.0 +gfx950,256,8122,6144,1536,ck,0,0,129.1891,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1186.61,942.15,0.0 +gfx950,256,8046,6144,1536,ck,0,0,129.2153,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1175.27,933.83,0.0 +gfx950,256,2593,7168,4096,ck,0,0,129.3358,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1177.26,596.54,0.0 +gfx950,256,8015,6144,1536,ck,0,0,129.3573,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1169.46,929.49,0.0 +gfx950,256,8051,6144,1536,ck,0,0,129.481,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1173.59,932.45,0.0 +gfx950,256,8018,6144,1536,ck,0,0,129.5958,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1167.74,928.1,0.0 +gfx950,256,2567,7168,4096,ck,0,0,129.6417,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1162.7,591.44,0.0 +gfx950,256,6026,8192,1536,ck,0,0,129.7323,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1168.94,929.37,0.0 +gfx950,256,2570,7168,4096,ck,0,0,129.7339,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1163.24,591.44,0.0 +gfx950,256,2636,7168,4096,ck,0,0,129.7477,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1192.98,600.76,0.0 +gfx950,256,8040,6144,1536,ck,0,0,129.8253,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1168.88,928.8,0.0 +gfx950,256,8048,6144,1536,ck,0,0,129.8278,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1170.02,929.64,0.0 +gfx950,256,1009,9216,7168,ck,0,0,129.8497,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1026.65,707.67,0.0 +gfx950,256,8010,6144,1536,ck,0,0,129.8773,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1164.05,925.24,0.0 +gfx950,256,8001,6144,1536,ck,0,0,129.9859,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1161.77,923.51,0.0 +gfx950,256,8112,6144,1536,ck,0,0,130.0391,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1177.41,934.93,0.0 +gfx950,256,8047,6144,1536,ck,0,0,130.0439,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1167.93,927.99,0.0 +gfx950,256,8123,6144,1536,ck,0,0,130.0654,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1178.76,935.91,0.0 +gfx950,256,8043,6144,1536,ck,0,0,130.2366,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1165.62,926.19,0.0 +gfx950,256,8192,6144,1536,ck,0,0,130.3155,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1186.5,941.43,0.0 +gfx950,256,2658,7168,4096,ck,0,0,130.3727,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1197.17,600.99,0.0 +gfx950,256,8007,6144,1536,ck,0,0,130.3799,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1159.13,921.35,0.0 +gfx950,256,1001,9216,7168,ck,0,0,130.4813,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1013.58,702.67,0.0 +gfx950,256,8008,6144,1536,ck,0,0,130.4931,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1158.27,920.66,0.0 +gfx950,256,8104,6144,1536,ck,0,0,130.8571,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1168.89,928.24,0.0 +gfx950,256,1023,9216,7168,ck,0,0,130.9864,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1031.86,704.26,0.0 +gfx950,256,8103,6144,1536,ck,0,0,131.0039,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1167.44,927.09,0.0 +gfx950,256,8120,6144,1536,ck,0,0,131.0229,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1169.72,928.75,0.0 +gfx950,256,1015,9216,7168,ck,0,0,131.1045,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1022.87,702.07,0.0 +gfx950,256,6107,8192,1536,ck,0,0,131.2567,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1170.89,929.63,0.0 +gfx950,256,6108,8192,1536,ck,0,0,131.2611,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1171.05,929.74,0.0 +gfx950,256,2713,7168,4096,ck,0,0,131.369,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1212.68,604.15,0.0 +gfx950,256,2748,7168,4096,ck,0,0,131.5078,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1227.02,608.41,0.0 +gfx950,256,2356,7168,4608,ck,0,0,131.6086,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1182.58,590.1,0.0 +gfx950,256,2734,7168,4096,ck,0,0,131.867,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1217.45,604.8,0.0 +gfx950,256,2794,7168,4096,ck,0,0,131.9226,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1243.64,612.93,0.0 +gfx950,256,8182,6144,1536,ck,0,0,132.1127,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1168.93,927.58,0.0 +gfx950,256,2387,7168,4608,ck,0,0,132.1586,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1193.16,592.09,0.0 +gfx950,256,1047,9216,7168,ck,0,0,132.1875,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1046.47,702.51,0.0 +gfx950,256,2390,7168,4608,ck,0,0,132.2694,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1193.66,592.02,0.0 +gfx950,256,1040,9216,7168,ck,0,0,132.3047,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1038.55,700.54,0.0 +gfx950,256,1087,9216,7168,ck,0,0,132.3223,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1085.34,709.54,0.0 +gfx950,256,8090,6144,1536,ck,0,0,132.3463,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1153.74,916.33,0.0 +gfx950,256,2747,7168,4096,ck,0,0,132.3494,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1218.78,604.41,0.0 +gfx950,256,2355,7168,4608,ck,0,0,132.6046,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1173.2,585.52,0.0 +gfx950,256,2403,7168,4608,ck,0,0,132.6199,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1196.98,592.31,0.0 +gfx950,256,2740,7168,4096,ck,0,0,132.7126,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1212.35,601.78,0.0 +gfx950,256,2358,7168,4608,ck,0,0,132.7797,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1173.15,585.18,0.0 +gfx950,256,2368,7168,4608,ck,0,0,132.8104,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1177.85,586.47,0.0 +gfx950,256,2365,7168,4608,ck,0,0,132.8652,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1175.87,585.8,0.0 +gfx950,256,2344,7168,4608,ck,0,0,132.8885,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1165.23,582.71,0.0 +gfx950,256,2729,7168,4096,ck,0,0,132.9349,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1205.46,599.25,0.0 +gfx950,256,2369,7168,4608,ck,0,0,133.064,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1176.1,585.5,0.0 +gfx950,256,1112,9216,7168,ck,0,0,133.127,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1103.59,710.05,0.0 +gfx950,256,2415,7168,4608,ck,0,0,133.1582,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1198.09,591.63,0.0 +gfx950,256,2378,7168,4608,ck,0,0,133.2459,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1178.96,585.98,0.0 +gfx950,256,2412,7168,4608,ck,0,0,133.3058,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1195.28,590.54,0.0 +gfx950,256,2757,7168,4096,ck,0,0,133.5538,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1212.18,600.34,0.0 +gfx950,256,1132,9216,7168,ck,0,0,133.6591,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1118.97,711.06,0.0 +gfx950,256,2486,7168,4608,ck,0,0,133.6902,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1228.41,599.33,0.0 +gfx950,256,2444,7168,4608,ck,0,0,133.9503,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1205.31,592.23,0.0 +gfx950,256,1144,9216,7168,ck,0,0,134.0889,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1127.21,711.07,0.0 +gfx950,256,2443,7168,4608,ck,0,0,134.2122,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1202.46,590.93,0.0 +gfx950,256,2441,7168,4608,ck,0,0,134.3866,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1199.92,589.88,0.0 +gfx950,256,2464,7168,4608,ck,0,0,134.4398,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1210.75,592.89,0.0 +gfx950,256,2445,7168,4608,ck,0,0,134.5615,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1200.32,589.68,0.0 +gfx950,256,2479,7168,4608,ck,0,0,134.734,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1215.46,593.71,0.0 +gfx950,256,2515,7168,4608,ck,0,0,134.878,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1231.79,598.13,0.0 +gfx950,256,2548,7168,4608,ck,0,0,135.0864,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1246.03,601.83,0.0 +gfx950,256,2544,7168,4608,ck,0,0,135.2985,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1242.12,600.33,0.0 +gfx950,256,2534,7168,4608,ck,0,0,135.5888,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1234.59,597.65,0.0 +gfx950,256,6146,8192,1536,ck,0,0,135.8508,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1138.52,903.34,0.0 +gfx950,256,2658,7168,4608,ck,0,0,136.0247,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1290.86,613.0,0.0 +gfx950,256,6225,8192,1536,ck,0,0,136.0444,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1151.52,912.46,0.0 +gfx950,256,20480,512,7168,ck,0,0,136.4239,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1101.89,1256.69,0.0 +gfx950,256,6307,8192,1536,ck,0,0,136.53,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1162.53,919.98,0.0 +gfx950,256,6304,8192,1536,ck,0,0,136.672,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1160.77,918.63,0.0 +gfx950,256,2597,7168,4608,ck,0,0,136.8528,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1253.6,600.85,0.0 +gfx950,256,2570,7168,4608,ck,0,0,136.8818,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1240.3,596.98,0.0 +gfx950,256,2594,7168,4608,ck,0,0,136.8878,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1251.83,600.28,0.0 +gfx950,256,2567,7168,4608,ck,0,0,137.349,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1234.64,594.54,0.0 +gfx950,256,2636,7168,4608,ck,0,0,137.5534,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1265.94,603.16,0.0 +gfx950,256,6418,8192,1536,ck,0,0,138.1404,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1169.2,923.65,0.0 +gfx950,256,2593,7168,4608,ck,0,0,138.225,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1239.24,594.33,0.0 +gfx950,256,6583,8192,1536,ck,0,0,138.5772,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1195.48,942.08,0.0 +gfx950,256,6476,8192,1536,ck,0,0,138.6112,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1175.76,928.01,0.0 +gfx950,256,6449,8192,1536,ck,0,0,138.7099,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1170.03,923.86,0.0 +gfx950,256,2734,7168,4608,ck,0,0,139.1499,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1297.94,609.58,0.0 +gfx950,256,2757,7168,4608,ck,0,0,139.2523,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1307.9,612.26,0.0 +gfx950,256,2740,7168,4608,ck,0,0,139.4618,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1297.88,609.03,0.0 +gfx950,256,2747,7168,4608,ck,0,0,139.4888,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1300.95,609.86,0.0 +gfx950,256,2748,7168,4608,ck,0,0,139.6964,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1299.49,609.09,0.0 +gfx950,256,2843,7168,4096,ck,14,0,140.3695,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1189.3,582.48,0.0 +gfx950,256,2729,7168,4608,ck,0,0,140.4398,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1283.67,603.31,0.0 +gfx950,256,2713,7168,4608,ck,0,0,140.5242,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1275.38,600.79,0.0 +gfx950,256,2841,7168,4096,ck,14,0,140.6828,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1185.82,580.92,0.0 +gfx950,256,2794,7168,4608,ck,0,0,140.6979,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1311.84,610.95,0.0 +gfx950,256,2861,7168,4096,ck,14,0,142.0283,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1182.85,578.01,0.0 +gfx950,256,2876,7168,4096,ck,14,0,143.0707,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1180.39,575.73,0.0 +gfx950,256,1185,9216,7168,ck,0,0,143.868,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1088.24,670.03,0.0 +gfx950,256,6811,8192,1536,ck,0,0,145.556,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1177.58,924.98,0.0 +gfx950,256,6813,8192,1536,ck,0,0,145.8084,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1175.89,923.62,0.0 +gfx950,256,3019,7168,4096,ck,0,0,146.0218,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1214.04,582.15,0.0 +gfx950,256,2889,7168,4096,ck,0,0,146.1931,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1160.4,565.08,0.0 +gfx950,256,6819,8192,1536,ck,0,0,146.3141,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1172.86,921.16,0.0 +gfx950,256,3000,7168,4096,ck,0,0,146.4856,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1202.58,577.91,0.0 +gfx950,256,2952,7168,4096,ck,0,0,146.516,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1183.09,571.76,0.0 +gfx950,256,6812,8192,1536,ck,0,0,146.5188,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1170.02,919.02,0.0 +gfx950,256,2918,7168,4096,ck,0,0,146.5408,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1169.27,567.38,0.0 +gfx950,256,6814,8192,1536,ck,0,0,146.5616,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1170.02,919.0,0.0 +gfx950,256,1255,9216,7168,ck,14,0,146.5647,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1131.32,669.93,0.0 +gfx950,256,3004,7168,4096,ck,0,0,146.8105,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1201.52,577.14,0.0 +gfx950,256,6809,8192,1536,ck,0,0,146.8876,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1166.57,916.35,0.0 +gfx950,256,2947,7168,4096,ck,0,0,146.898,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1178.02,569.64,0.0 +gfx950,256,6818,8192,1536,ck,0,0,147.0629,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1166.72,916.35,0.0 +gfx950,256,1265,9216,7168,ck,14,0,147.1173,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1136.05,669.16,0.0 +gfx950,256,3005,7168,4096,ck,0,0,147.3628,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1197.41,575.1,0.0 +gfx950,256,7001,8192,1536,ck,0,0,147.4794,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1194.65,936.0,0.0 +gfx950,256,2964,7168,4096,ck,0,0,147.5256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1179.77,569.34,0.0 +gfx950,256,6815,8192,1536,ck,0,0,147.5994,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1161.96,912.66,0.0 +gfx950,256,2946,7168,4096,ck,0,0,147.9252,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1169.44,565.56,0.0 +gfx950,256,6817,8192,1536,ck,0,0,148.0345,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1158.89,910.22,0.0 +gfx950,256,7009,8192,1536,ck,0,0,148.1551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1190.56,932.7,0.0 +gfx950,256,2955,7168,4096,ck,0,0,148.1796,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1171.0,565.71,0.0 +gfx950,256,7000,8192,1536,ck,0,0,148.358,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1187.4,930.34,0.0 +gfx950,256,7050,8192,1536,ck,0,0,149.0829,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1190.07,931.82,0.0 +gfx950,256,2843,7168,4608,ck,14,0,151.2196,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1241.96,574.58,0.0 +gfx950,256,3258,7168,4096,ck,0,0,151.8289,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1260.04,588.9,0.0 +gfx950,256,2841,7168,4608,ck,14,0,151.8544,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1235.9,571.93,0.0 +gfx950,256,2876,7168,4608,ck,0,0,153.0816,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1241.1,571.68,0.0 +gfx950,256,2889,7168,4608,ck,0,0,153.408,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1244.06,572.07,0.0 +gfx950,256,2861,7168,4608,ck,14,0,153.53,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1231.02,568.16,0.0 +gfx950,256,2947,7168,4608,ck,0,0,154.0906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1263.41,576.66,0.0 +gfx950,256,7241,8192,1536,ck,0,0,154.2533,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1181.34,922.78,0.0 +gfx950,256,7238,8192,1536,ck,0,0,154.3396,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1180.19,921.91,0.0 +gfx950,256,7242,8192,1536,ck,0,0,154.4622,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1179.91,921.65,0.0 +gfx950,256,3000,7168,4608,ck,0,0,154.5837,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1282.03,581.32,0.0 +gfx950,256,2955,7168,4608,ck,0,0,154.5886,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1262.76,575.78,0.0 +gfx950,256,7440,8192,1536,ck,0,0,154.5935,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1211.14,943.82,0.0 +gfx950,256,2946,7168,4608,ck,0,0,154.6949,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1258.05,574.29,0.0 +gfx950,256,3005,7168,4608,ck,0,0,154.9148,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1281.42,580.69,0.0 +gfx950,256,2918,7168,4608,ck,0,0,154.9837,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1243.77,569.79,0.0 +gfx950,256,3451,7168,4096,ck,0,0,155.0927,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1306.6,599.44,0.0 +gfx950,256,3004,7168,4608,ck,0,0,155.4227,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1276.81,578.67,0.0 +gfx950,256,2952,7168,4608,ck,0,0,155.4737,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1254.3,572.14,0.0 +gfx950,256,3019,7168,4608,ck,0,0,155.6538,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1281.28,579.63,0.0 +gfx950,256,7289,8192,1536,ck,0,0,156.5144,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1171.99,914.94,0.0 +gfx950,256,2964,7168,4608,ck,0,0,156.5277,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1250.91,569.74,0.0 +gfx950,256,1333,9216,7168,ck,14,0,156.9715,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1121.97,638.24,0.0 +gfx950,256,1334,9216,7168,ck,14,0,157.4529,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1119.38,636.45,0.0 +gfx950,256,7536,8192,1536,ck,0,0,157.4856,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1204.23,937.41,0.0 +gfx950,256,7539,8192,1536,ck,0,0,157.5731,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1204.05,937.23,0.0 +gfx950,256,7656,8192,1536,ck,0,0,157.6846,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1221.87,949.86,0.0 +gfx950,256,3258,7168,4608,ck,0,0,161.0546,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1336.34,588.31,0.0 +gfx950,256,3451,7168,4608,ck,0,0,163.9306,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1390.67,600.29,0.0 +gfx950,256,8001,8192,1536,ck,0,0,164.1964,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1226.29,949.84,0.0 +gfx950,256,8015,8192,1536,ck,0,0,165.0449,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1222.12,946.48,0.0 +gfx950,256,7809,8192,1536,ck,0,0,165.2369,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1189.32,923.04,0.0 +gfx950,256,8048,8192,1536,ck,0,0,165.256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1225.58,948.85,0.0 +gfx950,256,8047,8192,1536,ck,0,0,165.4053,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1224.32,947.88,0.0 +gfx950,256,8018,8192,1536,ck,0,0,165.6303,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1218.25,943.46,0.0 +gfx950,256,8007,8192,1536,ck,0,0,165.6697,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1216.29,942.05,0.0 +gfx950,256,7871,8192,1536,ck,0,0,166.1381,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1192.26,924.72,0.0 +gfx950,256,8051,8192,1536,ck,0,0,166.2135,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1218.97,943.71,0.0 +gfx950,256,8010,8192,1536,ck,0,0,166.4064,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1211.36,938.2,0.0 +gfx950,256,8008,8192,1536,ck,0,0,166.7702,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1208.42,935.94,0.0 +gfx950,256,8043,8192,1536,ck,0,0,167.0626,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1211.57,938.05,0.0 +gfx950,256,8040,8192,1536,ck,0,0,167.3334,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1209.16,936.21,0.0 +gfx950,256,7990,8192,1536,ck,0,0,167.5026,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1200.43,929.92,0.0 +gfx950,256,1490,9216,7168,ck,0,0,169.3577,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1162.39,615.29,0.0 +gfx950,256,1517,9216,7168,ck,0,0,169.9488,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1179.34,617.22,0.0 +gfx950,256,1485,9216,7168,ck,0,0,170.3247,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1151.91,611.05,0.0 +gfx950,256,3620,7168,4096,ck,0,0,171.6563,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1238.33,559.75,0.0 +gfx950,256,8090,8192,1536,ck,0,0,171.8302,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1184.84,916.93,0.0 +gfx950,256,3617,7168,4096,ck,0,0,172.4877,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1231.34,556.73,0.0 +gfx950,256,3632,7168,4096,ck,0,0,173.3084,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1230.59,555.69,0.0 +gfx950,256,3587,7168,4096,ck,0,0,173.5109,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1213.93,550.26,0.0 +gfx950,256,3655,7168,4096,ck,0,0,173.748,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1235.25,556.72,0.0 +gfx950,256,8182,8192,1536,ck,0,0,173.933,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1183.83,915.32,0.0 +gfx950,256,1580,9216,7168,ck,0,0,174.5725,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1195.78,610.11,0.0 +gfx950,256,3831,7168,4096,ck,0,0,174.6234,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1288.24,572.51,0.0 +gfx950,256,1623,9216,7168,ck,0,0,174.9197,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1225.89,615.19,0.0 +gfx950,256,1548,9216,7168,ck,0,0,175.6106,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1164.64,601.84,0.0 +gfx950,256,1590,9216,7168,ck,0,0,175.7698,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1195.15,607.41,0.0 +gfx950,256,1553,9216,7168,ck,0,0,175.8667,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1166.7,601.69,0.0 +gfx950,256,1599,9216,7168,ck,0,0,176.0069,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1200.3,607.9,0.0 +gfx950,256,1639,9216,7168,ck,0,0,176.3435,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1227.98,612.55,0.0 +gfx950,256,3840,7168,4096,ck,0,0,176.8292,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1275.16,566.3,0.0 +gfx950,256,3749,7168,4096,ck,0,0,177.004,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1243.71,556.27,0.0 +gfx950,256,3824,7168,4096,ck,0,0,177.2754,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1266.65,563.21,0.0 +gfx950,256,1601,9216,7168,ck,0,0,177.4687,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1191.9,603.18,0.0 +gfx950,256,1710,9216,7168,ck,0,0,178.5256,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1265.51,615.24,0.0 +gfx950,256,1718,9216,7168,ck,0,0,179.3081,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1265.88,613.7,0.0 +gfx950,256,4000,7168,4096,ck,0,0,179.8231,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1306.18,573.28,0.0 +gfx950,256,3894,7168,4096,ck,0,0,179.8646,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1271.27,562.28,0.0 +gfx950,256,3971,7168,4096,ck,0,0,180.5249,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1291.67,568.09,0.0 +gfx950,256,4050,7168,4096,ck,0,0,180.9551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1314.23,574.78,0.0 +gfx950,256,3925,7168,4096,ck,0,0,181.0343,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1273.11,561.8,0.0 +gfx950,256,4002,7168,4096,ck,0,0,181.8983,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1291.92,566.94,0.0 +gfx950,256,1685,9216,7168,ck,0,0,181.9633,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1223.45,600.1,0.0 +gfx950,256,4046,7168,4096,ck,0,0,181.9651,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1305.65,571.19,0.0 +gfx950,256,3587,7168,4608,ck,0,0,183.5431,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1291.02,550.18,0.0 +gfx950,256,3620,7168,4608,ck,0,0,183.6145,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1302.39,553.37,0.0 +gfx950,256,3840,7168,4608,ck,0,0,183.8447,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1379.81,575.35,0.0 +gfx950,256,3632,7168,4608,ck,0,0,184.0852,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1303.37,553.19,0.0 +gfx950,256,3617,7168,4608,ck,0,0,184.1786,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1297.33,551.37,0.0 +gfx950,256,3655,7168,4608,ck,0,0,184.5089,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1308.61,554.28,0.0 +gfx950,256,3824,7168,4608,ck,0,0,185.7975,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1359.62,567.67,0.0 +gfx950,256,3749,7168,4608,ck,0,0,186.4223,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1328.49,558.15,0.0 +gfx950,256,3971,7168,4608,ck,0,0,188.7739,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1389.63,573.47,0.0 +gfx950,256,3831,7168,4608,ck,0,0,189.3449,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1336.59,557.74,0.0 +gfx950,256,3925,7168,4608,ck,0,0,189.7271,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1366.63,566.0,0.0 +gfx950,256,4050,7168,4608,ck,0,0,190.0997,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1407.39,577.35,0.0 +gfx950,256,4002,7168,4608,ck,0,0,190.181,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1390.11,572.32,0.0 +gfx950,256,4000,7168,4608,ck,0,0,190.4939,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1387.14,571.18,0.0 +gfx950,256,3894,7168,4608,ck,0,0,190.9686,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1347.02,559.24,0.0 +gfx950,256,4046,7168,4608,ck,0,0,193.1147,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1384.05,567.94,0.0 +gfx950,256,4176,7168,4096,ck,0,0,193.694,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1266.0,548.97,0.0 +gfx950,256,4123,7168,4096,ck,0,0,193.8406,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1248.98,543.51,0.0 +gfx950,256,4180,7168,4096,ck,0,0,194.499,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1261.96,547.08,0.0 +gfx950,256,4294,7168,4096,ck,0,0,196.1113,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1285.72,553.29,0.0 +gfx950,256,4504,7168,4096,ck,0,0,196.534,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1345.7,571.8,0.0 +gfx950,256,4507,7168,4096,ck,0,0,197.7924,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1338.03,568.44,0.0 +gfx950,256,4522,7168,4096,ck,0,0,198.7084,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1336.29,567.21,0.0 +gfx950,256,4598,7168,4096,ck,0,0,198.795,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1358.16,574.01,0.0 +gfx950,256,4123,7168,4608,ck,0,0,202.8112,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1342.96,547.98,0.0 +gfx950,256,4180,7168,4608,ck,0,0,205.1085,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1346.27,547.11,0.0 +gfx950,256,4294,7168,4608,ck,0,0,207.1032,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1369.67,552.26,0.0 +gfx950,256,4176,7168,4608,ck,0,0,207.4071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1330.08,540.68,0.0 +gfx950,256,1968,9216,7168,ck,0,0,207.4619,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1253.31,561.26,0.0 +gfx950,256,1809,9216,7168,ck,0,0,207.7974,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1150.19,540.77,0.0 +gfx950,256,1817,9216,7168,ck,0,0,208.0519,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1153.86,541.09,0.0 +gfx950,256,1934,9216,7168,ck,0,0,208.1306,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1227.7,555.28,0.0 +gfx950,256,1808,9216,7168,ck,0,0,208.1696,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1147.5,539.68,0.0 +gfx950,256,1816,9216,7168,ck,0,0,208.2015,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1152.4,540.58,0.0 +gfx950,256,1980,9216,7168,ck,0,0,208.217,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1256.38,560.7,0.0 +gfx950,256,1987,9216,7168,ck,0,0,208.3071,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1260.27,561.32,0.0 +gfx950,256,2003,9216,7168,ck,0,0,208.4775,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1269.38,562.83,0.0 +gfx950,256,1821,9216,7168,ck,0,0,208.5407,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1153.69,540.32,0.0 +gfx950,256,2006,9216,7168,ck,0,0,208.5596,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1270.78,562.98,0.0 +gfx950,256,1944,9216,7168,ck,0,0,209.3967,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1226.58,553.14,0.0 +gfx950,256,4522,7168,4608,ck,0,0,209.4079,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1426.52,566.81,0.0 +gfx950,256,2000,9216,7168,ck,0,0,209.4564,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1261.56,559.83,0.0 +gfx950,256,1978,9216,7168,ck,0,0,209.4967,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1247.44,557.04,0.0 +gfx950,256,2009,9216,7168,ck,0,0,209.5154,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1266.88,560.77,0.0 +gfx950,256,2011,9216,7168,ck,0,0,209.7597,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1266.66,560.36,0.0 +gfx950,256,1991,9216,7168,ck,0,0,209.8252,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1253.67,557.75,0.0 +gfx950,256,2004,9216,7168,ck,0,0,209.9913,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1260.86,558.89,0.0 +gfx950,256,2002,9216,7168,ck,0,0,210.1899,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1258.41,558.12,0.0 +gfx950,256,4507,7168,4608,ck,0,0,210.5352,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1414.18,562.43,0.0 +gfx950,256,4504,7168,4608,ck,0,0,210.7556,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1411.76,561.57,0.0 +gfx950,256,4598,7168,4608,ck,0,0,210.9763,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1439.71,569.42,0.0 +gfx950,256,2012,9216,7168,ck,0,0,211.1831,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1258.75,556.71,0.0 +gfx950,256,2041,9216,7168,ck,0,0,211.5481,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1274.69,559.26,0.0 +gfx950,256,2037,9216,7168,ck,0,0,211.9699,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1269.66,557.66,0.0 +gfx950,256,2035,9216,7168,ck,0,0,212.0795,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1267.76,557.13,0.0 +gfx950,256,2032,9216,7168,ck,0,0,212.8779,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1261.14,554.68,0.0 +gfx950,256,2086,9216,7168,ck,0,0,217.2606,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1268.54,549.86,0.0 +gfx950,256,5000,7168,4096,ck,0,0,217.6072,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1349.23,558.44,0.0 +gfx950,256,2116,9216,7168,ck,0,0,217.9422,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1282.76,551.66,0.0 +gfx950,256,4898,7168,4096,ck,0,0,217.9825,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1319.43,548.85,0.0 +gfx950,256,2129,9216,7168,ck,0,0,218.2738,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1288.68,552.35,0.0 +gfx950,256,2090,9216,7168,ck,0,0,218.5275,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1263.6,547.14,0.0 +gfx950,256,2091,9216,7168,ck,0,0,218.6122,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1263.72,547.04,0.0 +gfx950,256,2148,9216,7168,ck,0,0,218.6756,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1297.79,553.56,0.0 +gfx950,256,2127,9216,7168,ck,0,0,218.696,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1284.98,551.05,0.0 +gfx950,256,2112,9216,7168,ck,0,0,218.7835,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1275.41,549.07,0.0 +gfx950,256,2161,9216,7168,ck,0,0,218.81,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1304.84,554.74,0.0 +gfx950,256,5002,7168,4096,ck,0,0,218.9865,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1341.26,555.09,0.0 +gfx950,256,2088,9216,7168,ck,0,0,219.1563,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1258.77,545.33,0.0 +gfx950,256,2056,9216,7168,ck,0,0,219.2249,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1239.09,541.43,0.0 +gfx950,256,2128,9216,7168,ck,0,0,219.3088,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1281.99,549.62,0.0 +gfx950,256,5007,7168,4096,ck,0,0,219.3844,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1340.17,554.5,0.0 +gfx950,256,5059,7168,4096,ck,0,0,219.4247,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1353.84,558.77,0.0 +gfx950,256,5003,7168,4096,ck,0,0,219.4409,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1338.75,554.02,0.0 +gfx950,256,2085,9216,7168,ck,0,0,219.6634,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1254.06,543.72,0.0 +gfx950,256,2150,9216,7168,ck,0,0,219.7529,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1292.63,551.07,0.0 +gfx950,256,2149,9216,7168,ck,0,0,219.7533,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1292.03,550.96,0.0 +gfx950,256,2109,9216,7168,ck,0,0,219.9936,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1266.59,545.7,0.0 +gfx950,256,5006,7168,4096,ck,0,0,220.1645,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1335.15,552.45,0.0 +gfx950,256,2164,9216,7168,ck,0,0,220.2027,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1298.39,551.58,0.0 +gfx950,256,5004,7168,4096,ck,0,0,220.2518,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1334.09,552.07,0.0 +gfx950,256,2101,9216,7168,ck,0,0,220.3153,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1259.95,543.97,0.0 +gfx950,256,2163,9216,7168,ck,0,0,220.3182,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1297.11,551.17,0.0 +gfx950,256,2054,9216,7168,ck,0,0,220.6651,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1229.81,537.66,0.0 +gfx950,256,2160,9216,7168,ck,0,0,220.9246,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1291.75,549.31,0.0 +gfx950,256,2126,9216,7168,ck,0,0,220.974,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1271.14,545.25,0.0 +gfx950,256,2111,9216,7168,ck,0,0,221.0646,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1261.65,543.29,0.0 +gfx950,256,2166,9216,7168,ck,0,0,221.2551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1293.41,549.18,0.0 +gfx950,256,2087,9216,7168,ck,0,0,221.5924,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1244.34,539.22,0.0 +gfx950,256,5018,7168,4096,ck,0,0,222.0101,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1327.23,548.86,0.0 +gfx950,256,2146,9216,7168,ck,0,0,222.0956,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1276.62,544.8,0.0 +gfx950,256,2130,9216,7168,ck,0,0,222.3264,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1265.78,542.39,0.0 +gfx950,256,5126,7168,4096,ck,0,0,222.7434,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1351.33,555.99,0.0 +gfx950,256,5124,7168,4096,ck,0,0,225.0299,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1337.08,550.17,0.0 +gfx950,256,2189,9216,7168,ck,14,0,230.5736,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1254.32,529.54,0.0 +gfx950,256,4898,7168,4608,ck,0,0,231.2475,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1399.21,544.08,0.0 +gfx950,256,2216,9216,7168,ck,14,0,231.3894,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1265.31,530.66,0.0 +gfx950,256,2215,9216,7168,ck,14,0,231.4746,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1264.27,530.36,0.0 +gfx950,256,2185,9216,7168,ck,14,0,231.6539,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1246.18,526.63,0.0 +gfx950,256,2188,9216,7168,ck,14,0,231.7748,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1247.24,526.69,0.0 +gfx950,256,2207,9216,7168,ck,14,0,232.3235,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1255.1,527.54,0.0 +gfx950,256,2186,9216,7168,ck,14,0,232.3384,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1243.08,525.19,0.0 +gfx950,256,2226,9216,7168,ck,14,0,232.4408,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1265.27,529.36,0.0 +gfx950,256,5264,7168,4096,ck,0,0,232.5523,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1329.18,543.47,0.0 +gfx950,256,2217,9216,7168,ck,14,0,232.5799,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1259.4,528.06,0.0 +gfx950,256,5018,7168,4608,ck,0,0,232.7417,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1424.29,550.36,0.0 +gfx950,256,5007,7168,4608,ck,0,0,232.8042,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1420.78,549.31,0.0 +gfx950,256,2219,9216,7168,ck,14,0,232.8305,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1259.18,527.71,0.0 +gfx950,256,5003,7168,4608,ck,0,0,233.0757,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1417.99,548.35,0.0 +gfx950,256,5004,7168,4608,ck,0,0,233.2932,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1416.95,547.92,0.0 +gfx950,256,5002,7168,4608,ck,0,0,233.3618,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1415.97,547.6,0.0 +gfx950,256,5000,7168,4608,ck,0,0,233.4528,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1414.85,547.22,0.0 +gfx950,256,5006,7168,4608,ck,0,0,233.5539,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1415.94,547.47,0.0 +gfx950,256,5059,7168,4608,ck,0,0,234.1643,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1427.2,550.33,0.0 +gfx950,256,5276,7168,4096,ck,0,0,234.5676,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1320.76,539.75,0.0 +gfx950,256,5258,7168,4096,ck,0,0,235.036,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1313.63,537.26,0.0 +gfx950,256,2235,9216,7168,ck,14,0,235.1142,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1255.94,524.33,0.0 +gfx950,256,5273,7168,4096,ck,0,0,235.1464,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1316.76,538.18,0.0 +gfx950,256,5269,7168,4096,ck,0,0,235.5116,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1313.72,537.04,0.0 +gfx950,256,5285,7168,4096,ck,0,0,235.5462,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1317.52,538.21,0.0 +gfx950,256,5295,7168,4096,ck,0,0,235.9586,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1317.7,538.05,0.0 +gfx950,256,5277,7168,4096,ck,0,0,236.1238,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1312.31,536.27,0.0 +gfx950,256,5292,7168,4096,ck,0,0,236.4061,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1314.47,536.8,0.0 +gfx950,256,5272,7168,4096,ck,0,0,236.8339,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1307.13,534.27,0.0 +gfx950,256,5283,7168,4096,ck,0,0,237.1098,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1308.34,534.51,0.0 +gfx950,256,5251,7168,4096,ck,0,0,237.2367,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1299.71,531.73,0.0 +gfx950,256,5302,7168,4096,ck,0,0,237.2435,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1312.3,535.68,0.0 +gfx950,256,5368,7168,4096,ck,0,0,237.2762,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1328.45,540.73,0.0 +gfx950,256,5124,7168,4608,ck,0,0,237.6008,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1424.63,547.55,0.0 +gfx950,256,5281,7168,4096,ck,0,0,237.644,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1304.9,533.15,0.0 +gfx950,256,5275,7168,4096,ck,0,0,238.0852,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1301.0,531.7,0.0 +gfx950,256,5126,7168,4608,ck,0,0,238.5416,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1419.56,545.55,0.0 +gfx950,256,5489,7168,4096,ck,0,0,239.0588,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1348.27,546.03,0.0 +gfx950,256,5447,7168,4096,ck,0,0,239.3025,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1336.59,542.24,0.0 +gfx950,256,5431,7168,4096,ck,0,0,239.5891,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1331.07,540.36,0.0 +gfx950,256,5440,7168,4096,ck,0,0,239.8922,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1331.59,540.37,0.0 +gfx950,256,5540,7168,4096,ck,0,0,240.167,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1354.52,547.42,0.0 +gfx950,256,2268,9216,7168,ck,0,0,240.9577,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1243.58,515.12,0.0 +gfx950,256,2252,9216,7168,ck,0,0,241.0209,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1234.48,513.28,0.0 +gfx950,256,2254,9216,7168,ck,0,0,241.2619,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1234.34,512.98,0.0 +gfx950,256,2304,9216,7168,ck,0,0,241.6271,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1259.82,517.5,0.0 +gfx950,256,2250,9216,7168,ck,0,0,241.9372,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1228.71,511.13,0.0 +gfx950,256,2248,9216,7168,ck,0,0,242.2063,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1226.26,510.35,0.0 +gfx950,256,5670,7168,4096,ck,0,0,242.4782,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1373.09,552.09,0.0 +gfx950,256,2301,9216,7168,ck,0,0,242.8625,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1251.78,514.55,0.0 +gfx950,256,2251,9216,7168,ck,0,0,242.8958,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1224.41,509.21,0.0 +gfx950,256,2282,9216,7168,ck,0,0,243.1132,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1240.16,512.02,0.0 +gfx950,256,2369,9216,7168,ck,0,0,243.3738,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1286.06,520.63,0.0 +gfx950,256,2344,9216,7168,ck,0,0,243.6108,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1271.25,517.49,0.0 +gfx950,256,2249,9216,7168,ck,0,0,244.4554,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1215.51,505.76,0.0 +gfx950,256,2368,9216,7168,ck,0,0,244.4693,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1279.76,518.19,0.0 +gfx950,256,2355,9216,7168,ck,0,0,244.4725,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1272.72,516.82,0.0 +gfx950,256,2378,9216,7168,ck,0,0,244.7485,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1283.7,518.64,0.0 +gfx950,256,2356,9216,7168,ck,0,0,244.8418,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1271.34,516.15,0.0 +gfx950,256,2365,9216,7168,ck,0,0,244.9708,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1275.52,516.81,0.0 +gfx950,256,2390,9216,7168,ck,0,0,245.0038,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1288.83,519.36,0.0 +gfx950,256,2412,9216,7168,ck,0,0,245.5688,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1297.7,520.45,0.0 +gfx950,256,2358,9216,7168,ck,0,0,245.655,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1268.2,514.64,0.0 +gfx950,256,2415,9216,7168,ck,0,0,245.6594,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1298.84,520.58,0.0 +gfx950,256,2403,9216,7168,ck,0,0,246.4106,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1288.44,517.74,0.0 +gfx950,256,2387,9216,7168,ck,0,0,246.4228,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1279.8,516.05,0.0 +gfx950,256,2445,9216,7168,ck,0,0,246.4821,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1310.58,521.95,0.0 +gfx950,256,2479,9216,7168,ck,0,0,247.9666,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1320.85,522.34,0.0 +gfx950,256,2464,9216,7168,ck,0,0,247.9928,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1312.72,520.74,0.0 +gfx950,256,2486,9216,7168,ck,0,0,248.0842,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1323.95,522.81,0.0 +gfx950,256,2534,9216,7168,ck,0,0,248.4384,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1347.59,527.01,0.0 +gfx950,256,2444,9216,7168,ck,0,0,248.5064,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1299.37,517.6,0.0 +gfx950,256,2441,9216,7168,ck,0,0,248.5748,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1297.42,517.15,0.0 +gfx950,256,2544,9216,7168,ck,0,0,249.0558,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1349.56,526.74,0.0 +gfx950,256,2443,9216,7168,ck,0,0,249.0646,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1295.93,516.34,0.0 +gfx950,256,2548,9216,7168,ck,0,0,249.0889,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1351.5,527.08,0.0 +gfx950,256,2515,9216,7168,ck,0,0,249.8447,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1329.96,522.1,0.0 +gfx950,256,5275,7168,4608,ck,0,0,250.2272,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1392.61,531.36,0.0 +gfx950,256,5295,7168,4608,ck,0,0,250.2766,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1397.61,532.77,0.0 +gfx950,256,5264,7168,4608,ck,0,0,250.3999,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1388.74,530.16,0.0 +gfx950,256,5251,7168,4608,ck,0,0,250.8508,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1382.82,528.22,0.0 +gfx950,256,5292,7168,4608,ck,0,0,250.8751,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1393.49,531.27,0.0 +gfx950,256,5302,7168,4608,ck,0,0,250.8828,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1396.08,532.01,0.0 +gfx950,256,5276,7168,4608,ck,0,0,251.2519,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1387.19,529.26,0.0 +gfx950,256,5368,7168,4608,ck,0,0,251.674,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1409.01,535.3,0.0 +gfx950,256,5283,7168,4608,ck,0,0,251.8314,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1385.83,528.57,0.0 +gfx950,256,5258,7168,4608,ck,0,0,251.8336,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1379.26,526.69,0.0 +gfx950,256,5272,7168,4608,ck,0,0,251.9343,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1382.38,527.53,0.0 +gfx950,256,5273,7168,4608,ck,0,0,252.0943,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1381.77,527.27,0.0 +gfx950,256,5269,7168,4608,ck,0,0,252.3084,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1379.55,526.52,0.0 +gfx950,256,5281,7168,4608,ck,0,0,252.3915,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1382.24,527.25,0.0 +gfx950,256,5285,7168,4608,ck,0,0,252.6477,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1381.88,527.02,0.0 +gfx950,256,5277,7168,4608,ck,0,0,253.0007,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1377.86,525.68,0.0 +gfx950,256,5431,7168,4608,ck,0,0,253.263,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1416.6,536.66,0.0 +gfx950,256,5489,7168,4608,ck,0,0,253.9814,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1427.68,539.46,0.0 +gfx950,256,5447,7168,4608,ck,0,0,254.2202,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1415.43,535.83,0.0 +gfx950,256,5440,7168,4608,ck,0,0,255.1173,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1408.64,533.42,0.0 +gfx950,256,2658,9216,7168,ck,0,0,256.0555,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1371.49,523.73,0.0 +gfx950,256,2593,9216,7168,ck,0,0,256.1904,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1337.24,516.96,0.0 +gfx950,256,2570,9216,7168,ck,0,0,256.2854,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1324.89,514.47,0.0 +gfx950,256,2597,9216,7168,ck,0,0,256.4285,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1338.06,516.88,0.0 +gfx950,256,2594,9216,7168,ck,0,0,256.585,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1335.7,516.27,0.0 +gfx950,256,2567,9216,7168,ck,0,0,256.8749,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1320.31,512.99,0.0 +gfx950,256,5540,7168,4608,ck,0,0,257.2505,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1422.64,536.36,0.0 +gfx950,256,2636,9216,7168,ck,0,0,257.8539,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1350.65,517.9,0.0 +gfx950,256,5670,7168,4608,ck,0,0,259.9393,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1440.96,540.29,0.0 +gfx950,256,5890,7168,4096,ck,0,0,260.6838,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1326.75,529.09,0.0 +gfx950,256,6002,7168,4096,ck,0,0,260.6957,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1351.92,536.98,0.0 +gfx950,256,6001,7168,4096,ck,0,0,260.8845,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1350.71,536.52,0.0 +gfx950,256,6008,7168,4096,ck,0,0,261.5246,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1348.98,535.7,0.0 +gfx950,256,6108,7168,4096,ck,0,0,261.9001,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1369.47,541.97,0.0 +gfx950,256,6107,7168,4096,ck,0,0,262.7356,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1364.89,540.18,0.0 +gfx950,256,6026,7168,4096,ck,0,0,263.6662,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1342.03,532.61,0.0 +gfx950,256,6146,7168,4096,ck,0,0,264.3101,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1365.42,539.68,0.0 +gfx950,256,6225,7168,4096,ck,0,0,266.6027,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1371.08,540.5,0.0 +gfx950,256,6307,7168,4096,ck,0,0,267.272,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1385.66,544.8,0.0 +gfx950,256,6304,7168,4096,ck,0,0,267.6781,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1382.9,543.77,0.0 +gfx950,256,6002,7168,4608,ck,0,0,274.4139,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1444.88,534.71,0.0 +gfx950,256,5890,7168,4608,ck,0,0,274.7153,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1416.36,526.4,0.0 +gfx950,256,6008,7168,4608,ck,0,0,275.9823,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1438.1,532.08,0.0 +gfx950,256,6001,7168,4608,ck,0,0,275.9929,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1436.37,531.58,0.0 +gfx950,256,6026,7168,4608,ck,0,0,278.2775,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1430.51,528.92,0.0 +gfx950,256,6108,7168,4608,ck,0,0,278.9141,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1446.67,533.28,0.0 +gfx950,256,6476,7168,4096,ck,0,0,279.1636,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1362.18,532.75,0.0 +gfx950,256,6418,7168,4096,ck,0,0,279.7655,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1347.08,527.79,0.0 +gfx950,256,6107,7168,4608,ck,0,0,279.9261,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1441.2,531.29,0.0 +gfx950,256,6449,7168,4096,ck,0,0,281.1807,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1346.77,527.16,0.0 +gfx950,256,6583,7168,4096,ck,0,0,281.3915,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1373.73,535.55,0.0 +gfx950,256,6225,7168,4608,ck,0,0,282.4253,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1456.05,534.5,0.0 +gfx950,256,6146,7168,4608,ck,0,0,282.6555,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1436.4,528.77,0.0 +gfx950,256,2794,9216,7168,ck,0,0,283.894,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1300.29,484.64,0.0 +gfx950,256,6307,7168,4608,ck,0,0,284.0242,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1466.93,536.96,0.0 +gfx950,256,2748,9216,7168,ck,0,0,284.2928,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1277.09,479.82,0.0 +gfx950,256,2757,9216,7168,ck,0,0,284.4086,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1280.75,480.43,0.0 +gfx950,256,2713,9216,7168,ck,0,0,284.6242,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1259.36,476.11,0.0 +gfx950,256,6304,7168,4608,ck,0,0,284.9726,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1461.35,534.97,0.0 +gfx950,256,2729,9216,7168,ck,0,0,285.0171,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1265.04,476.89,0.0 +gfx950,256,2747,9216,7168,ck,0,0,285.1415,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1272.82,478.3,0.0 +gfx950,256,2734,9216,7168,ck,0,0,285.4549,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1265.41,476.61,0.0 +gfx950,256,6815,7168,4096,ck,0,0,285.5846,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1401.26,542.66,0.0 +gfx950,256,2740,9216,7168,ck,0,0,285.9301,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1266.08,476.36,0.0 +gfx950,256,2876,9216,7168,ck,0,0,285.9696,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1328.74,488.46,0.0 +gfx950,256,6809,7168,4096,ck,0,0,285.979,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1398.1,541.52,0.0 +gfx950,256,2841,9216,7168,ck,0,0,286.2949,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1311.08,484.78,0.0 +gfx950,256,2843,9216,7168,ck,0,0,286.306,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1311.95,484.94,0.0 +gfx950,256,2861,9216,7168,ck,0,0,286.5411,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1319.17,486.15,0.0 +gfx950,256,6817,7168,4096,ck,0,0,286.5851,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1396.78,540.89,0.0 +gfx950,256,2889,9216,7168,ck,0,0,286.6482,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1331.58,488.47,0.0 +gfx950,256,2918,9216,7168,ck,0,0,286.8587,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1343.96,490.7,0.0 +gfx950,256,6813,7168,4096,ck,0,0,286.9278,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1394.29,539.99,0.0 +gfx950,256,6811,7168,4096,ck,0,0,287.0207,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1393.43,539.68,0.0 +gfx950,256,6814,7168,4096,ck,0,0,287.2381,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1392.99,539.47,0.0 +gfx950,256,6819,7168,4096,ck,0,0,287.4156,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1393.15,539.46,0.0 +gfx950,256,6812,7168,4096,ck,0,0,287.5426,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1391.11,538.77,0.0 +gfx950,256,6818,7168,4096,ck,0,0,288.2667,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1388.83,537.8,0.0 +gfx950,256,2947,9216,7168,ck,0,0,289.8048,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1343.52,488.27,0.0 +gfx950,256,2946,9216,7168,ck,0,0,290.0462,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1341.95,487.78,0.0 +gfx950,256,2955,9216,7168,ck,0,0,290.4563,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1344.15,487.88,0.0 +gfx950,256,3000,9216,7168,ck,0,0,290.9507,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1362.3,491.01,0.0 +gfx950,256,2952,9216,7168,ck,0,0,291.0039,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1340.26,486.7,0.0 +gfx950,256,2964,9216,7168,ck,0,0,291.5906,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1343.0,486.77,0.0 +gfx950,256,3004,9216,7168,ck,0,0,291.925,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1359.56,489.72,0.0 +gfx950,256,3019,9216,7168,ck,0,0,292.7736,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1362.39,489.62,0.0 +gfx950,256,3005,9216,7168,ck,0,0,295.022,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1345.74,484.67,0.0 +gfx950,256,6476,7168,4608,ck,0,0,298.4587,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1433.39,521.72,0.0 +gfx950,256,6418,7168,4608,ck,0,0,299.9363,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1413.55,515.49,0.0 +gfx950,256,6583,7168,4608,ck,0,0,300.4069,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1447.62,525.08,0.0 +gfx950,256,6814,7168,4608,ck,0,0,300.4743,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1498.08,539.53,0.0 +gfx950,256,6449,7168,4608,ck,0,0,300.742,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1416.57,516.06,0.0 +gfx950,256,7000,7168,4096,ck,0,0,301.2823,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1364.31,525.7,0.0 +gfx950,256,6818,7168,4608,ck,0,0,301.3546,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1494.58,538.2,0.0 +gfx950,256,7001,7168,4096,ck,0,0,301.8697,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1361.85,524.74,0.0 +gfx950,256,7009,7168,4096,ck,0,0,302.0272,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1362.69,524.95,0.0 +gfx950,256,6812,7168,4608,ck,0,0,304.3132,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1478.75,532.6,0.0 +gfx950,256,6813,7168,4608,ck,0,0,304.374,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1478.67,532.55,0.0 +gfx950,256,6811,7168,4608,ck,0,0,304.4964,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1477.64,532.22,0.0 +gfx950,256,7050,7168,4096,ck,0,0,304.6527,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1358.85,522.91,0.0 +gfx950,256,6809,7168,4608,ck,0,0,304.953,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1475.0,531.29,0.0 +gfx950,256,6815,7168,4608,ck,0,0,304.9784,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1476.17,531.62,0.0 +gfx950,256,6817,7168,4608,ck,0,0,305.5362,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1473.91,530.78,0.0 +gfx950,256,6819,7168,4608,ck,0,0,305.6783,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1473.66,530.65,0.0 +gfx950,256,7241,7168,4096,ck,0,0,306.5496,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1387.03,531.16,0.0 +gfx950,256,7289,7168,4096,ck,0,0,307.4328,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1392.21,532.51,0.0 +gfx950,256,7242,7168,4096,ck,0,0,307.5344,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1382.78,529.52,0.0 +gfx950,256,7238,7168,4096,ck,0,0,307.794,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1380.85,528.83,0.0 +gfx950,256,7536,7168,4096,ck,0,0,312.103,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1417.85,539.13,0.0 +gfx950,256,7539,7168,4096,ck,0,0,312.3545,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1417.27,538.87,0.0 +gfx950,256,7440,7168,4096,ck,0,0,312.5886,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1397.62,532.63,0.0 +gfx950,256,7009,7168,4608,ck,0,0,319.4419,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1449.45,519.06,0.0 +gfx950,256,3258,9216,7168,ck,0,0,319.8829,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1345.65,467.25,0.0 +gfx950,256,7000,7168,4608,ck,0,0,320.021,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1444.97,517.59,0.0 +gfx950,256,7001,7168,4608,ck,0,0,321.0978,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1440.33,515.91,0.0 +gfx950,256,7050,7168,4608,ck,0,0,321.4955,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1448.62,518.16,0.0 +gfx950,256,3451,9216,7168,ck,0,0,324.4159,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1405.44,475.95,0.0 +gfx950,256,7238,7168,4608,ck,0,0,325.0163,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1471.14,523.5,0.0 +gfx950,256,7241,7168,4608,ck,0,0,325.1871,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1470.98,523.4,0.0 +gfx950,256,7656,7168,4096,ck,0,0,325.2187,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1382.34,524.19,0.0 +gfx950,256,7242,7168,4608,ck,0,0,327.5243,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1460.68,519.73,0.0 +gfx950,256,7289,7168,4608,ck,0,0,328.0345,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1467.87,521.63,0.0 +gfx950,256,7809,7168,4096,ck,0,0,328.494,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1395.91,527.55,0.0 +gfx950,256,8018,7168,4096,ck,0,0,329.5899,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1428.5,537.48,0.0 +gfx950,256,8008,7168,4096,ck,0,0,330.1886,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1424.13,535.95,0.0 +gfx950,256,7871,7168,4096,ck,0,0,330.3587,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1399.05,528.03,0.0 +gfx950,256,8001,7168,4096,ck,0,0,330.5063,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1421.52,535.04,0.0 +gfx950,256,7990,7168,4096,ck,0,0,330.551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1419.37,534.36,0.0 +gfx950,256,8015,7168,4096,ck,0,0,331.1775,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1421.12,534.74,0.0 +gfx950,256,8040,7168,4096,ck,0,0,331.2802,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1425.11,535.96,0.0 +gfx950,256,8048,7168,4096,ck,0,0,331.4648,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1425.73,536.11,0.0 +gfx950,256,8010,7168,4096,ck,0,0,332.4519,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1414.79,532.41,0.0 +gfx950,256,7536,7168,4608,ck,0,0,332.5684,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1496.93,528.59,0.0 +gfx950,256,8007,7168,4096,ck,0,0,332.6879,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1413.26,531.87,0.0 +gfx950,256,7440,7168,4608,ck,0,0,332.8316,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1476.69,522.71,0.0 +gfx950,256,7539,7168,4608,ck,0,0,332.8707,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1496.16,528.28,0.0 +gfx950,256,8047,7168,4096,ck,0,0,333.2304,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1418.0,533.21,0.0 +gfx950,256,8051,7168,4096,ck,0,0,333.7155,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1416.65,532.66,0.0 +gfx950,256,8043,7168,4096,ck,0,0,334.587,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1411.55,530.83,0.0 +gfx950,256,8090,7168,4096,ck,0,0,335.2167,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1417.13,532.42,0.0 +gfx950,256,8182,7168,4096,ck,0,0,335.6459,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1431.42,536.79,0.0 +gfx950,256,7656,7168,4608,ck,0,0,344.4162,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1468.45,517.01,0.0 +gfx950,256,8018,7168,4608,ck,0,0,348.1248,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1521.5,531.2,0.0 +gfx950,256,7809,7168,4608,ck,0,0,349.0305,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1477.99,518.48,0.0 +gfx950,256,8001,7168,4608,ck,0,0,351.2698,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1504.68,525.53,0.0 +gfx950,256,8007,7168,4608,ck,0,0,351.4314,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1505.12,525.61,0.0 +gfx950,256,7871,7168,4608,ck,0,0,351.8356,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1477.85,517.68,0.0 +gfx950,256,7990,7168,4608,ck,0,0,352.2187,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1498.56,523.52,0.0 +gfx950,256,8015,7168,4608,ck,0,0,352.4466,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1502.28,524.52,0.0 +gfx950,256,8043,7168,4608,ck,0,0,352.4643,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1507.45,526.0,0.0 +gfx950,256,8182,7168,4608,ck,0,0,352.6091,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1532.87,533.25,0.0 +gfx950,256,8051,7168,4608,ck,0,0,352.636,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1508.22,526.18,0.0 +gfx950,256,8040,7168,4608,ck,0,0,352.7171,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1505.81,525.46,0.0 +gfx950,256,8010,7168,4608,ck,0,0,352.9633,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1499.14,523.49,0.0 +gfx950,256,8008,7168,4608,ck,0,0,353.1514,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1497.97,523.1,0.0 +gfx950,256,8047,7168,4608,ck,0,0,353.75,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1502.72,524.3,0.0 +gfx950,256,8048,7168,4608,ck,0,0,354.3782,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1500.24,523.43,0.0 +gfx950,256,8090,7168,4608,ck,0,0,354.9997,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1505.43,524.75,0.0 +gfx950,256,3620,9216,7168,ck,0,0,358.1535,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1335.4,443.2,0.0 +gfx950,256,3587,9216,7168,ck,0,0,358.3998,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1322.31,440.53,0.0 +gfx950,256,3617,9216,7168,ck,0,0,359.2198,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1330.33,441.67,0.0 +gfx950,256,3632,9216,7168,ck,0,0,359.292,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1335.58,442.65,0.0 +gfx950,256,3655,9216,7168,ck,0,0,361.7026,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1335.08,441.32,0.0 +gfx950,256,3840,9216,7168,ck,0,0,365.6039,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1387.68,449.57,0.0 +gfx950,256,3749,9216,7168,ck,0,0,366.1114,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1352.92,442.58,0.0 +gfx950,256,3824,9216,7168,ck,0,0,366.6245,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1378.06,447.2,0.0 +gfx950,256,3831,9216,7168,ck,0,0,370.2031,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1367.23,443.36,0.0 +gfx950,256,3894,9216,7168,ck,0,0,371.0755,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1386.45,446.67,0.0 +gfx950,256,3925,9216,7168,ck,0,0,372.5836,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1391.83,446.99,0.0 +gfx950,256,4000,9216,7168,ck,0,0,373.2503,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1415.89,451.33,0.0 +gfx950,256,4002,9216,7168,ck,0,0,378.8347,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1395.72,444.82,0.0 +gfx950,256,3971,9216,7168,ck,0,0,379.2322,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1383.46,442.26,0.0 +gfx950,256,4050,9216,7168,ck,0,0,382.2395,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1399.88,444.07,0.0 +gfx950,256,4046,9216,7168,ck,0,0,385.5733,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1386.4,439.96,0.0 +gfx950,256,4180,9216,7168,ck,0,0,401.9307,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1374.03,430.59,0.0 +gfx950,256,4123,9216,7168,ck,0,0,402.1989,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1354.39,426.68,0.0 +gfx950,256,4176,9216,7168,ck,0,0,403.394,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1367.73,428.78,0.0 +gfx950,256,4294,9216,7168,ck,0,0,407.1645,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1393.36,432.23,0.0 +gfx950,256,4522,9216,7168,ck,14,0,426.31,a8w8_blockscale_1x128x128_256x64x256x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,1401.44,426.51,0.0 +gfx950,256,4507,9216,7168,ck,0,0,426.9688,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1394.64,424.95,0.0 +gfx950,256,4504,9216,7168,ck,0,0,428.0664,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1390.14,423.68,0.0 +gfx950,256,4598,9216,7168,ck,0,0,430.6589,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1410.61,426.72,0.0 +gfx950,256,4898,9216,7168,ck,0,0,445.0601,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1454.02,430.16,0.0 +gfx950,256,5004,9216,7168,ck,0,0,462.157,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1430.53,420.12,0.0 +gfx950,256,5059,9216,7168,ck,0,0,465.7833,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1435.0,419.87,0.0 +gfx950,256,5003,9216,7168,ck,0,0,465.8192,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1419.0,416.76,0.0 +gfx950,256,5002,9216,7168,ck,0,0,467.0724,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1414.91,415.59,0.0 +gfx950,256,5018,9216,7168,ck,0,0,467.1755,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1419.13,416.38,0.0 +gfx950,256,5000,9216,7168,ck,0,0,467.4705,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1413.14,415.13,0.0 +gfx950,256,5006,9216,7168,ck,0,0,467.7881,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1413.88,415.17,0.0 +gfx950,256,5007,9216,7168,ck,0,0,469.4578,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1409.13,413.75,0.0 +gfx950,256,5124,9216,7168,ck,0,0,470.5382,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1438.75,419.17,0.0 +gfx950,256,5126,9216,7168,ck,0,0,471.0363,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1437.79,418.83,0.0 +gfx950,256,5272,9216,7168,ck,0,0,472.2956,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1474.8,425.63,0.0 +gfx950,256,5258,9216,7168,ck,0,0,473.3658,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1467.55,423.91,0.0 +gfx950,256,5368,9216,7168,ck,0,0,474.5745,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1494.44,428.77,0.0 +gfx950,256,5264,9216,7168,ck,0,0,474.7605,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1464.91,422.99,0.0 +gfx950,256,5281,9216,7168,ck,0,0,475.3455,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1467.83,423.38,0.0 +gfx950,256,5285,9216,7168,ck,0,0,475.4294,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1468.69,423.53,0.0 +gfx950,256,5251,9216,7168,ck,0,0,475.4551,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1459.16,421.67,0.0 +gfx950,256,5269,9216,7168,ck,0,0,475.752,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1463.25,422.38,0.0 +gfx950,256,5283,9216,7168,ck,0,0,476.8096,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1463.88,422.19,0.0 +gfx950,256,5302,9216,7168,ck,0,0,476.8694,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1468.96,423.16,0.0 +gfx950,256,5292,9216,7168,ck,0,0,476.9702,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1465.88,422.53,0.0 +gfx950,256,5273,9216,7168,ck,0,0,477.4273,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1459.22,421.11,0.0 +gfx950,256,5295,9216,7168,ck,0,0,477.6059,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1464.76,422.13,0.0 +gfx950,256,5276,9216,7168,ck,0,0,477.981,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1458.36,420.78,0.0 +gfx950,256,5277,9216,7168,ck,0,0,478.6613,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1456.56,420.24,0.0 +gfx950,256,5275,9216,7168,ck,0,0,478.6729,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1455.98,420.12,0.0 +gfx950,256,5540,9216,7168,ck,0,0,498.4332,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1468.5,417.08,0.0 +gfx950,256,5440,9216,7168,ck,0,0,504.4976,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1424.66,406.99,0.0 +gfx950,256,5489,9216,7168,ck,0,0,506.8569,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1430.8,407.57,0.0 +gfx950,256,5431,9216,7168,ck,0,0,507.7065,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1413.31,403.96,0.0 +gfx950,256,5447,9216,7168,ck,0,0,508.9571,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1413.99,403.77,0.0 +gfx950,256,5670,9216,7168,ck,0,0,511.1871,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1465.46,413.18,0.0 +gfx950,256,6008,9216,7168,ck,0,0,540.4309,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1468.79,406.83,0.0 +gfx950,256,6002,9216,7168,ck,0,0,541.0366,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1465.68,406.09,0.0 +gfx950,256,6001,9216,7168,ck,0,0,541.0555,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1465.39,406.03,0.0 +gfx950,256,5890,9216,7168,ck,0,0,543.501,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1431.81,398.98,0.0 +gfx950,256,6108,9216,7168,ck,0,0,546.6226,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1476.32,406.91,0.0 +gfx950,256,6026,9216,7168,ck,0,0,547.5947,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1453.92,402.35,0.0 +gfx950,256,6107,9216,7168,ck,0,0,547.8973,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1472.65,405.91,0.0 +gfx950,256,6225,9216,7168,ck,0,0,557.8282,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1474.38,404.1,0.0 +gfx950,256,6146,9216,7168,ck,0,0,558.1346,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1454.87,400.26,0.0 +gfx950,256,6304,9216,7168,ck,0,0,569.5471,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1462.37,399.34,0.0 +gfx950,256,6307,9216,7168,ck,0,0,571.9375,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1456.95,397.8,0.0 +gfx950,256,6449,9216,7168,ck,0,0,579.0958,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1471.34,399.16,0.0 +gfx950,256,6418,9216,7168,ck,0,0,581.2605,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1458.81,396.31,0.0 +gfx950,256,6476,9216,7168,ck,0,0,581.52,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1471.34,398.69,0.0 +gfx950,256,6583,9216,7168,ck,0,0,582.5054,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1493.12,402.72,0.0 +gfx950,256,6809,9216,7168,ck,0,0,608.7679,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1477.75,394.85,0.0 +gfx950,256,6811,9216,7168,ck,0,0,610.1569,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1474.82,394.03,0.0 +gfx950,256,6814,9216,7168,ck,0,0,610.9253,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1473.62,393.66,0.0 +gfx950,256,6813,9216,7168,ck,0,0,613.1513,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1468.05,392.19,0.0 +gfx950,256,6817,9216,7168,ck,0,0,613.7094,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1467.58,392.0,0.0 +gfx950,256,6818,9216,7168,ck,0,0,613.9091,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1467.32,391.92,0.0 +gfx950,256,6812,9216,7168,ck,0,0,614.4265,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1464.79,391.34,0.0 +gfx950,256,6819,9216,7168,ck,0,0,617.3259,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1459.41,389.79,0.0 +gfx950,256,7000,9216,7168,ck,0,0,617.4862,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1497.76,397.19,0.0 +gfx950,256,6815,9216,7168,ck,0,0,618.3965,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1456.03,388.95,0.0 +gfx950,256,7050,9216,7168,ck,0,0,621.3622,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1499.05,396.77,0.0 +gfx950,256,7001,9216,7168,ck,0,0,622.4355,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1486.06,394.07,0.0 +gfx950,256,7009,9216,7168,ck,0,0,622.6508,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1487.24,394.27,0.0 +gfx950,256,7241,9216,7168,ck,0,0,638.0275,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1499.44,394.07,0.0 +gfx950,256,7242,9216,7168,ck,0,0,639.3471,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1496.55,393.3,0.0 +gfx950,256,7238,9216,7168,ck,0,0,646.4276,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1479.34,388.83,0.0 +gfx950,256,7440,9216,7168,ck,0,0,651.7981,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1508.1,393.56,0.0 +gfx950,256,7289,9216,7168,ck,0,0,652.1499,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1476.7,387.42,0.0 +gfx950,256,7536,9216,7168,ck,0,0,658.1329,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1512.86,393.51,0.0 +gfx950,256,7539,9216,7168,ck,0,0,659.0081,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1511.45,393.1,0.0 +gfx950,256,7656,9216,7168,ck,0,0,669.935,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1509.87,391.16,0.0 +gfx950,256,7809,9216,7168,ck,0,0,694.9813,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1484.54,382.7,0.0 +gfx950,256,7871,9216,7168,ck,0,0,696.4046,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1493.27,384.2,0.0 +gfx950,256,7990,9216,7168,ck,0,0,701.9611,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1503.85,385.5,0.0 +gfx950,256,8040,9216,7168,ck,0,0,705.723,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1505.19,385.26,0.0 +gfx950,256,8007,9216,7168,ck,0,0,706.2417,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1497.91,383.78,0.0 +gfx950,256,8182,9216,7168,ck,0,0,706.3281,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1530.47,390.07,0.0 +gfx950,256,8010,9216,7168,ck,0,0,706.4507,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1498.03,383.77,0.0 +gfx950,256,8043,9216,7168,ck,0,0,706.599,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1503.89,384.89,0.0 +gfx950,256,8015,9216,7168,ck,0,0,707.4913,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1496.76,383.39,0.0 +gfx950,256,8018,9216,7168,ck,0,0,707.871,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1496.52,383.29,0.0 +gfx950,256,8001,9216,7168,ck,0,0,708.2183,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1492.61,382.49,0.0 +gfx950,256,8090,9216,7168,ck,0,0,708.419,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1508.79,385.6,0.0 +gfx950,256,8008,9216,7168,ck,0,0,709.7609,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1490.67,381.91,0.0 +gfx950,256,8048,9216,7168,ck,0,0,710.4124,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1496.75,383.0,0.0 +gfx950,256,8047,9216,7168,ck,0,0,710.9983,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1495.33,382.65,0.0 +gfx950,256,8051,9216,7168,ck,0,0,712.0659,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_2x2_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1493.83,382.22,0.0 diff --git a/aiter/configs/model_configs/glm47_fp8_tuned_fmoe.csv b/aiter/configs/model_configs/glm47_fp8_tuned_fmoe.csv new file mode 100644 index 0000000000..274c78f663 --- /dev/null +++ b/aiter/configs/model_configs/glm47_fp8_tuned_fmoe.csv @@ -0,0 +1,17 @@ +cu_num,token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,q_type,use_g1u1,doweight_stage1,block_m,ksplit,us1,kernelName1,err1,us2,kernelName2,err2,us,run_1stage,tflops,bw,_tag +256,1,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,24.9944,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.00%,20.3787,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.80%,45.3731,0,8.32,20799.41 +256,2,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,40.11,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.00%,28.5666,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.80%,68.6766,0,10.99,13741.93 +256,4,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,99.8134,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.00%,0,,0.00%,99.8134,1,15.13,9455.44 +256,8,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,134.9101,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.00%,0,,0.00%,134.9101,1,22.38,6996.08 +256,16,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,156.6731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.00%,0,,0.00%,156.6731,1,38.55,6025.06 +256,32,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,164.2209,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.00%,0,,0.00%,164.2209,1,73.56,5749.63 +256,64,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,168.1699,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.00%,0,,0.00%,168.1699,1,143.66,5617.54 +256,128,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,177.1016,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x384E,0.00%,0,,0.00%,177.1016,1,272.83,5339.79 +256,256,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,64,0,111.1445,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_64x128_2tg_pf3E,0.00%,94.3821,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.80%,205.5266,0,470.19,4610.84 +256,512,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,128.617,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.00%,136.7373,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.80%,265.3543,0,728.36,3586.08 +256,1024,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,64,0,227.447,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.00%,192.093,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.80%,419.54,0,921.36,2286.9 +256,2048,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,352.8768,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.00%,345.4655,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.70%,698.3423,0,1107.04,1396.42 +256,4096,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,582.0723,moe_ck2stages_gemm1_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.00%,641.9346,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.80%,1224.0069,0,1263.22,822.41 +256,8192,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,1078.8184,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.00%,1239.192,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.80%,2318.0104,0,1334.06,461.41 +256,16384,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,1988.7154,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.00%,2328.3393,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.80%,4317.0547,0,1432.63,276.9 +256,32768,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,3930.1981,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.00%,4488.9205,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.80%,8419.1186,0,1469.22,171.87 diff --git a/aiter/configs/model_configs/glm47_fp8_untuned_fmoe.csv b/aiter/configs/model_configs/glm47_fp8_untuned_fmoe.csv new file mode 100644 index 0000000000..9f72b923bd --- /dev/null +++ b/aiter/configs/model_configs/glm47_fp8_untuned_fmoe.csv @@ -0,0 +1,17 @@ +token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,q_type,use_g1u1,doweight_stage1 +1,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +2,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +4,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +8,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +16,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +32,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +64,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +128,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +256,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +512,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +1024,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +2048,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +4096,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +8192,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +16384,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 +32768,5120,1536,40,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0 diff --git a/aiter/configs/model_configs/kimik2_fp4_tuned_fmoe.csv b/aiter/configs/model_configs/kimik2_fp4_tuned_fmoe.csv index 37393d10f6..f58fcd1238 100644 --- a/aiter/configs/model_configs/kimik2_fp4_tuned_fmoe.csv +++ b/aiter/configs/model_configs/kimik2_fp4_tuned_fmoe.csv @@ -1,129 +1,161 @@ cu_num,token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,q_type,use_g1u1,doweight_stage1,block_m,ksplit,us1,kernelName1,err1,us2,kernelName2,err2,us,run_1stage,tflops,bw,_tag -256,1,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,13.9799,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_kb14_xcd4_fp4,22.5%,8.1319,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,1.5%,22.1118,0,3.98,95602.83, -256,2,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,17.2713,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb14_bnt0_fp4,20.6%,10.1607,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,1.4%,27.432,0,6.42,77062.27, -256,4,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,22.6984,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_kb4_bnt0_fp4,20.4%,13.2528,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic,1.3%,35.9512,0,9.8,58802.36, -256,8,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,30.0262,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_bnt0,0.0%,17.6643,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic,1.3%,47.6905,0,14.78,44329.61, -256,16,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,39.0069,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4,0.0%,25.6226,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.3%,64.6295,0,21.81,32713.75, -256,32,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,63.889300000000006,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4,0.0%,37.0933,flydsl_moe2_afp4_wfp4_bf16_t16x256x256_atomic_bnt2_persist_sbm32,1.3%,100.9826,0,27.91,20940.41, -256,64,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,93.5036,flydsl_moe1_afp4_wfp4_bf16_t32x32x256_w3,0.0%,53.9899,flydsl_moe2_afp4_wfp4_bf16_t16x256x256_atomic_bnt2_sbm32,1.2%,147.4935,0,38.22,14341.69, -256,128,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,108.3724,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_fp4,17.2%,66.6968,flydsl_moe2_afp4_wfp4_bf16_t16x256x256_atomic_bnt2_sbm32,1.3%,175.0692,0,64.4,12090.54, -256,256,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,116.6103,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_fp4,17.1%,74.0789,flydsl_moe2_afp4_wfp4_bf16_t16x256x256_atomic_bnt2_persist_sbm32,1.2%,190.6892,0,118.25,11114.6, -256,512,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,117.5849,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.0%,80.9526,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.2%,198.5375,0,227.15,10702.96, -256,1024,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,121.0563,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_fp4,17.4%,106.3806,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2_sbm64,1.3%,227.4369,0,396.57,9391.39, -256,2048,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,144.0412,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w4,0.0%,163.2069,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_sbm128,0.1%,307.2481,0,587.11,7023.54, -256,4096,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,158.9084,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w3,0.0%,270.351,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.1%,429.2594,0,840.46,5129.79, -256,8192,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,252.1561,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w4_bnt0_xcd4,0.0%,463.9421,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_sbm128,0.1%,716.0982,0,1007.62,3198.01, -256,16384,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,397.2011,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w2_bnt0_xcd4,0.0%,924.8895,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.1%,1322.0906,0,1091.54,1865.42, -256,32768,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,673.6421,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w3_bnt0_xcd4,0.0%,1783.0066,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.1%,2456.6487,0,1174.86,1147.32, -256,1,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,17.9168,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb14_fp4,17.7%,7.5463,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,2.4%,25.4631,0,6.92,166039.48, -256,2,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,22.5887,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_kb2_go_fp4,18.7%,12.9583,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,2.9%,35.547,0,9.91,118938.35, -256,4,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,28.125300000000003,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3,0.0%,17.5122,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.5%,45.6375,0,15.44,92641.89, -256,8,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,40.4268,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_fp4,16.7%,27.7099,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_xcd4,2.7%,68.1367,0,20.68,62052.18, -256,16,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,67.7149,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_fp4,17.4%,40.7525,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.7%,108.4674,0,25.99,38981.32, -256,32,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,131.41219999999998,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4,0.0%,64.6228,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.8%,196.035,0,28.76,21570.37, -256,64,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,174.3146,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3,0.0%,92.6896,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,2.8%,267.0042,0,42.23,15839.58, -256,128,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,211.3136,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.5%,115.4686,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,2.8%,326.7822,0,69.0,12946.27, -256,256,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,227.1786,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_fp4,17.5%,128.1656,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,2.8%,355.3442,0,126.91,11913.42, -256,512,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,230.7154,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.2%,134.2382,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,2.8%,364.9536,0,247.14,11614.81, -256,1024,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,237.3056,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_fp4,17.3%,152.7537,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_sbm64,2.7%,390.0593,0,462.46,10895.47, -256,2048,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,248.2288,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_fp4,17.3%,215.1947,flydsl_moe2_afp4_wfp4_bf16_t64x128x256_atomic_bnt2_persist,2.8%,463.4235,0,778.5,9218.13, -256,4096,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,309.7988,flydsl_moe1_afp4_wfp4_bf16_t128x128x256,0.0%,382.4107,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_xcd4_persist_sbm128,0.4%,692.2095,0,1042.39,6235.02, -256,8192,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,492.2694,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w4_xcd4,0.0%,633.2934,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.4%,1125.5628,0,1282.12,3912.73, -256,16384,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,795.5577000000001,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w2_bnt0_xcd4,0.0%,1192.7816,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_xcd4_persist,0.4%,1988.3393,0,1451.57,2303.52, -256,32768,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,1276.6282,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0%,2259.8079,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_xcd4_persist_sbm128,0.4%,3536.4361,0,1632.27,1394.77, -256,1,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,17.8886,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_kb4_go_fp4,20.8%,7.5925,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,2.9%,25.4811,0,7.78,166354.28, -256,2,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,22.8149,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb7_xcd4_fp4,21.0%,13.1788,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,2.9%,35.9937,0,11.01,117768.15, -256,4,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,27.7235,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_fp4,19.7%,19.328,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic,2.9%,47.0515,0,16.85,90091.8, -256,8,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,51.6825,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4_fp4,0.0%,29.1534,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.7%,80.8359,0,19.61,52440.07, -256,16,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,78.1272,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3,0.0%,44.6234,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.9%,122.7506,0,25.83,34535.17, -256,32,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,132.1537,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4_fp4,17.8%,67.5444,flydsl_moe2_afp4_wfp4_bf16_t16x256x256_atomic_bnt2_persist_sbm32,3.0%,199.6981,0,31.76,21229.83, -256,64,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,184.367,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.2%,101.1521,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,2.9%,285.5191,0,44.42,14851.0, -256,128,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,219.1026,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w2_fp4,17.3%,120.1837,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,3.0%,339.2863,0,74.77,12501.6, -256,256,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,228.2939,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_fp4,17.3%,132.8644,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_sbm64,2.9%,361.1583,0,140.48,11752.11, -256,512,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,248.5047,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4,0.0%,135.3295,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,3.0%,383.8342,0,264.36,11072.17, -256,1024,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,250.3871,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w2_fp4,17.2%,157.9713,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_sbm64,2.9%,408.3584,0,496.96,10434.19, -256,2048,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,265.6454,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w2_fp4,17.3%,229.7372,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist,0.3%,495.3826,0,819.32,8645.66, -256,4096,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,335.2709,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w4,0.0%,397.6695,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.3%,732.9404,0,1107.52,5903.55, -256,8192,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,544.8151,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w2_xcd4,0.0%,707.078,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_sbm128,0.3%,1251.8931,0,1296.83,3526.68, -256,16384,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,862.1587,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w4_bnt0_xcd4,0.0%,1305.102,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_xcd4,0.3%,2167.2607,0,1498.2,2118.43, -256,32768,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,1391.8997,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0%,2564.68,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_xcd4_persist_sbm128,0.3%,3956.5797,0,1641.31,1249.44, -256,1,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,13.9802,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb14_fp4,13.9%,7.9113,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,1.4%,21.8915,0,4.53,96816.38, -256,2,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,17.2185,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_kb4_bnt0_go_fp4,15.8%,8.5243,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,1.2%,25.7428,0,7.7,82332.82, -256,4,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,23.2988,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb7_bnt0_fp4,17.3%,13.6377,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic,1.3%,36.9365,0,10.73,57382.81, -256,8,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,29.0089,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w4_fp4,18.4%,21.204,flydsl_moe2_afp4_wfp4_bf16_t64x128x256_atomic,1.1%,50.2129,0,15.79,42212.39, -256,16,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,43.3489,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w2,0.0%,26.9591,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,1.1%,70.308,0,22.55,30149.89, -256,32,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,66.8905,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2,0.0%,38.8442,flydsl_moe2_afp4_wfp4_bf16_t16x256x256_atomic_bnt2_persist_sbm32,1.3%,105.7347,0,29.99,20051.34, -256,64,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,101.211,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w4,0.0%,56.3305,flydsl_moe2_afp4_wfp4_bf16_t16x256x256_atomic_bnt2_sbm32,1.3%,157.5415,0,40.25,13461.92, -256,128,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,109.2599,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_fp4,17.2%,68.2535,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,1.3%,177.5134,0,71.45,11955.08, -256,256,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,115.5656,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_fp4,17.2%,74.744,flydsl_moe2_afp4_wfp4_bf16_t16x256x256_atomic_bnt2_persist_sbm32,1.4%,190.3096,0,133.29,11165.7, -256,512,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,130.7075,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_fp4,17.4%,82.6095,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2_persist,1.3%,213.317,0,237.84,9987.22, -256,1024,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,135.4983,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w4_fp4,17.4%,119.8506,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_persist_sbm64,1.3%,255.3489,0,397.37,8386.39, -256,2048,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,144.584,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w2_fp4,17.1%,171.2946,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce,0.1%,315.8786,0,642.45,6849.07, -256,4096,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,179.2193,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w2,0.0%,279.7423,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.1%,458.9616,0,884.33,4809.8, -256,8192,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,284.782,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w4_bnt0_xcd4,0.0%,519.6888,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.1%,804.4708,0,1009.05,2853.55, -256,16384,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,436.7742,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w2_bnt0_xcd4,0.0%,1024.2602,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_sbm128,0.1%,1461.0344,0,1111.2,1691.78, -256,32768,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,737.0636,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_bnt0_xcd4,0.0%,2046.4027,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_sbm128,0.1%,2783.4663,0,1166.53,1014.59, -256,1,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,25.1008,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,8.1319,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0146,33.2327,0,0.0,0.0,flydsl_fallback -256,2,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,25.9981,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,10.1607,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0142,36.1588,0,0.0,0.0,flydsl_fallback -256,4,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,26.9679,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,16.0298,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0121,42.9977,0,0.0,0.0,flydsl_fallback -256,8,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,31.3996,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,19.5128,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0127,50.9124,0,0.0,0.0,flydsl_fallback -256,16,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,44.2564,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,27.9592,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0125,72.2156,0,0.0,0.0,flydsl_fallback -256,32,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,71.8968,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,41.4555,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0131,113.3523,0,0.0,0.0,flydsl_fallback -256,64,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,104.9668,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,58.3475,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0126,163.3143,0,0.0,0.0,flydsl_fallback -256,128,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,126.3709,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,72.5589,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0128,198.9298,0,0.0,0.0,flydsl_fallback -256,256,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,135.34449999999998,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,78.3594,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0124,213.7039,0,0.0,0.0,flydsl_fallback -256,512,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,136.5402,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,89.3539,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0126,225.8941,0,0.0,0.0,flydsl_fallback -256,1024,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,144.8236,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,115.9855,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0126,260.8091,0,0.0,0.0,flydsl_fallback -256,2048,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,162.96370000000002,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,213.5719,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0125,376.5356,0,0.0,0.0,flydsl_fallback -256,4096,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,169.8057,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,403.5172,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0126,573.3229,0,0.0,0.0,flydsl_fallback -256,8192,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,304.2362,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,764.3293,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0125,1068.5655,0,0.0,0.0,flydsl_fallback -256,16384,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,498.0106,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,1476.716,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0125,1974.7266,0,0.0,0.0,flydsl_fallback -256,32768,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,769.9171,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,3004.7704,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0125,3774.6875,0,0.0,0.0,flydsl_fallback -256,1,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,25.6052,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,7.5463,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0239,33.1515,0,0.0,0.0,flydsl_fallback -256,2,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,26.6499,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,15.3309,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0269,41.9808,0,0.0,0.0,flydsl_fallback -256,4,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,29.7861,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,19.304,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0256,49.0901,0,0.0,0.0,flydsl_fallback -256,8,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,47.689800000000005,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,28.816,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0267,76.5058,0,0.0,0.0,flydsl_fallback -256,16,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,78.2038,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,42.647,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0277,120.8508,0,0.0,0.0,flydsl_fallback -256,32,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,141.0361,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,70.653,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0281,211.6891,0,0.0,0.0,flydsl_fallback -256,64,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,199.124,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,99.0852,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0281,298.2092,0,0.0,0.0,flydsl_fallback -256,128,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,244.4832,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,123.0286,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0278,367.5118,0,0.0,0.0,flydsl_fallback -256,256,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,258.7976,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,143.752,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0276,402.5496,0,0.0,0.0,flydsl_fallback -256,512,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,267.3028,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,149.5863,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0274,416.8891,0,0.0,0.0,flydsl_fallback -256,1024,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,280.9852,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,182.9953,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0274,463.9805,0,0.0,0.0,flydsl_fallback -256,2048,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,347.36620000000005,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,270.1472,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0276,617.5134,0,0.0,0.0,flydsl_fallback -256,4096,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,481.9772,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,493.9473,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0273,975.9245,0,0.0,0.0,flydsl_fallback -256,8192,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,582.6193,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,973.3819,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0274,1556.0012,0,0.0,0.0,flydsl_fallback -256,16384,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,857.4551,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,1922.9129,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0275,2780.368,0,0.0,0.0,flydsl_fallback -256,32768,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,1498.8449,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,3782.1474,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0274,5280.9923,0,0.0,0.0,flydsl_fallback -256,1,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,25.8558,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,7.5925,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0294,33.4483,0,0.0,0.0,flydsl_fallback -256,2,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,27.7627,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,13.1788,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0292,40.9415,0,0.0,0.0,flydsl_fallback -256,4,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,32.6406,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,19.9788,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0302,52.6194,0,0.0,0.0,flydsl_fallback -256,8,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,53.4236,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,29.9109,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0288,83.3345,0,0.0,0.0,flydsl_fallback -256,16,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,88.1749,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,47.8359,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0295,136.0108,0,0.0,0.0,flydsl_fallback -256,32,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,151.64630000000002,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,73.5877,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0298,225.234,0,0.0,0.0,flydsl_fallback -256,64,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,210.0676,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,105.734,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0295,315.8016,0,0.0,0.0,flydsl_fallback -256,128,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,249.557,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,127.0834,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0298,376.6404,0,0.0,0.0,flydsl_fallback -256,256,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,258.6365,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,138.3303,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0294,396.9668,0,0.0,0.0,flydsl_fallback -256,512,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,276.2796,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,152.9675,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0296,429.2471,0,0.0,0.0,flydsl_fallback -256,1024,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,279.30060000000003,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,193.528,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0291,472.8286,0,0.0,0.0,flydsl_fallback -256,2048,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,300.9174,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,334.4923,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0291,635.4097,0,0.0,0.0,flydsl_fallback -256,4096,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,515.4662,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,553.9443,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0291,1069.4105,0,0.0,0.0,flydsl_fallback -256,8192,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,609.8722,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,1091.6879,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0291,1701.5601,0,0.0,0.0,flydsl_fallback -256,16384,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,931.2307,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,2118.819,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0291,3050.0497,0,0.0,0.0,flydsl_fallback -256,32768,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,1656.0413,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,4234.4266,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0291,5890.4679,0,0.0,0.0,flydsl_fallback -256,1,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,24.4312,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,7.9113,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0138,32.3425,0,0.0,0.0,flydsl_fallback -256,2,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,25.2559,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,8.5243,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0121,33.7802,0,0.0,0.0,flydsl_fallback -256,4,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,28.5367,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,14.3445,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0108,42.8812,0,0.0,0.0,flydsl_fallback -256,8,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,31.2631,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,19.6856,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0114,50.9487,0,0.0,0.0,flydsl_fallback -256,16,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,47.7488,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,30.0117,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0113,77.7605,0,0.0,0.0,flydsl_fallback -256,32,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,75.0572,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,42.507,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0126,117.5642,0,0.0,0.0,flydsl_fallback -256,64,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,109.6539,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,61.03,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0133,170.6839,0,0.0,0.0,flydsl_fallback -256,128,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,127.9358,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,70.973,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.013,198.9088,0,0.0,0.0,flydsl_fallback -256,256,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,134.8579,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,81.4717,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0134,216.3296,0,0.0,0.0,flydsl_fallback -256,512,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,143.2304,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,91.2423,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0132,234.4727,0,0.0,0.0,flydsl_fallback -256,1024,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,155.5109,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,127.797,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0135,283.3079,0,0.0,0.0,flydsl_fallback -256,2048,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,167.2837,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,233.5581,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0131,400.8418,0,0.0,0.0,flydsl_fallback -256,4096,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,189.3904,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,439.3833,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0132,628.7737,0,0.0,0.0,flydsl_fallback -256,8192,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,314.9939,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,845.8057,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0133,1160.7996,0,0.0,0.0,flydsl_fallback -256,16384,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,568.1025,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,1664.4928,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0133,2232.5953,0,0.0,0.0,flydsl_fallback -256,32768,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,812.3858,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,3354.1041,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0132,4166.4899,0,0.0,0.0,flydsl_fallback +256,1,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,13.2452,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_kb7_go_fp4,16.9%,6.6844,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,1.6%,19.9296,0,4.42,106070.91, +256,2,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,15.8944,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_kb4_go_fp4,17.2%,8.4755,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,1.3%,24.3699,0,7.23,86745.22, +256,4,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,21.3039,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb4_fp4,16.1%,12.4074,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic,1.3%,33.7113,0,10.45,62709.4, +256,8,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,29.8852,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w4,0.0%,17.8471,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.2%,47.7323,0,14.76,44290.79, +256,16,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,44.7944,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4,0.0%,27.4087,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.2%,72.2031,0,19.52,29282.31, +256,32,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,81.51140000000001,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3,0.0%,44.797,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.2%,126.3084,0,22.32,16741.7, +256,64,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,118.8141,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2,0.0%,67.0329,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,1.1%,185.847,0,30.33,11381.97, +256,128,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,118.5136,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w4,0.0%,69.4259,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.2%,187.9395,0,59.99,11262.57, +256,256,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,115.8638,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w4_fp4,17.6%,74.2967,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,1.2%,190.1605,0,118.58,11145.5, +256,512,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,117.5612,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_fp4,17.0%,80.6277,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.2%,198.1889,0,227.55,10721.79, +256,1024,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,119.968,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_fp4,17.3%,102.033,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2_sbm64,1.1%,222.001,0,406.28,9621.35, +256,2048,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,132.5067,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w4,0.0%,157.7576,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_bnt2,0.0%,290.2643,0,621.46,7434.5, +256,4096,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,156.74679999999998,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w2,0.0%,267.2572,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.0%,424.004,0,850.88,5193.37, +256,8192,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,250.0777,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w3_bnt0_xcd4,0.0%,463.8763,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_sbm128,0.0%,713.954,0,1010.65,3207.62, +256,16384,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,366.0242,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_bnt0_xcd4_fp4,17.3%,926.6298,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce,0.0%,1292.654,0,1116.39,1907.9, +256,32768,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,631.8818,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_bnt0_xcd4_fp4,17.3%,1751.4067,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist,0.0%,2383.2885,0,1211.02,1182.64, +256,1,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,16.354,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_kb4_go_fp4,12.7%,7.2729,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,2.6%,23.6269,0,7.46,178943.49, +256,2,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,21.5812,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb4_fp4,15.4%,12.8273,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,3.0%,34.4085,0,10.24,122873.75, +256,4,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,28.0247,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3,0.0%,17.2081,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.9%,45.2328,0,15.58,93470.77, +256,8,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,44.514500000000005,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_xcd4,0.0%,27.0211,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.8%,71.5356,0,19.7,59103.87, +256,16,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,81.495,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w4_xcd4,0.0%,44.4102,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2_xcd4,2.7%,125.9052,0,22.39,33582.43, +256,32,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,154.3846,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3,0.0%,80.4195,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_xcd4_persist,2.7%,234.8041,0,24.01,18008.83, +256,64,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,228.1004,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.2%,118.3423,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,2.8%,346.4427,0,32.54,12207.6, +256,128,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,228.1496,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3,0.0%,121.3876,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,2.7%,349.5372,0,64.51,12103.46, +256,256,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,228.5579,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.5%,124.7,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,2.7%,353.2579,0,127.66,11983.78, +256,512,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,232.2845,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_fp4,17.3%,136.0064,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist_sbm64,2.7%,368.2909,0,244.9,11509.57, +256,1024,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,233.5376,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w4_fp4,17.3%,146.6693,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,2.7%,380.2069,0,474.45,11177.8, +256,2048,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,238.474,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_fp4,17.3%,206.1031,flydsl_moe2_afp4_wfp4_bf16_t64x128x256_atomic_bnt2_persist,2.7%,444.5771,0,811.51,9608.9, +256,4096,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,292.4502,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w4,0.0%,377.921,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_xcd4_persist_sbm128,0.3%,670.3712,0,1076.35,6438.13, +256,8192,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,459.2863,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w2_bnt0_fp4,17.3%,626.7223,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist,0.3%,1086.0086,0,1328.82,4055.23, +256,16384,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,691.0762,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0%,1171.9833,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_xcd4_sbm128,0.3%,1863.0595,0,1549.18,2458.42, +256,32768,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,1208.457,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w2_bnt0_fp4,17.3%,2227.9499,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_xcd4,0.3%,3436.4069,0,1679.79,1435.37, +256,1,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,17.8501,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb7_fp4,16.7%,7.6621,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,2.4%,25.5122,0,7.77,166151.49, +256,2,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,22.8551,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb7_fp4,18.9%,13.1521,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.7%,36.0072,0,11.01,117724.0, +256,4,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,29.4262,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3,0.0%,19.3881,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.8%,48.8143,0,16.24,86838.38, +256,8,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,50.5137,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4_fp4,18.7%,29.363,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.7%,79.8767,0,19.85,53069.8, +256,16,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,95.7203,flydsl_moe1_afp4_wfp4_bf16_t32x32x256_w3,0.0%,49.2937,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,2.8%,145.014,0,21.87,29233.13, +256,32,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,171.3749,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_xcd4,0.0%,89.5678,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,2.8%,260.9427,0,24.3,16247.08, +256,64,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,236.2525,flydsl_moe1_afp4_wfp4_bf16_t64x64x256_w4_xcd4,0.0%,120.7332,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_sbm64,2.8%,356.9857,0,35.53,11877.91, +256,128,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,234.1601,flydsl_moe1_afp4_wfp4_bf16_t64x64x256_w4_xcd4_fp4,17.1%,122.0701,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_sbm64,2.9%,356.2302,0,71.21,11906.97, +256,256,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,236.6952,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w2_xcd4_fp4,17.2%,127.4914,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist_sbm64,2.9%,364.1866,0,139.31,11654.39, +256,512,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,242.1832,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.2%,134.7446,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,2.8%,376.9278,0,269.2,11275.05, +256,1024,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,246.3426,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.3%,153.0524,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,2.9%,399.395,0,508.11,10668.36, +256,2048,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,251.0842,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_fp4,17.2%,233.9762,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_atomic_bnt2_persist,2.8%,485.0604,0,836.75,8829.64, +256,4096,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,316.9738,flydsl_moe1_afp4_wfp4_bf16_t128x128x256,0.0%,400.1827,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.2%,717.1565,0,1131.9,6033.48, +256,8192,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,473.8717,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_bnt0_fp4,17.2%,670.9802,flydsl_moe2_afp4_wfp4_bf16_t64x128x256_reduce_persist,0.2%,1144.8519,0,1418.09,3856.42, +256,16384,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,731.1314,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_bnt0_fp4,17.3%,1313.4823,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_xcd4_persist,0.2%,2044.6137,0,1588.07,2245.5, +256,32768,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,1306.3543,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w2_bnt0_fp4,17.3%,2514.8871,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_xcd4_persist,0.2%,3821.2414,0,1699.45,1293.69, +256,1,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,13.3809,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb14_go_fp4,20.1%,6.8161,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,1.2%,20.197,0,4.91,104939.14, +256,2,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,16.4835,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_kb4_go_fp4,17.6%,8.4904,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,1.0%,24.9739,0,7.94,84867.69, +256,4,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,22.3365,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_kb2_go_fp4,17.3%,13.1164,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic,1.2%,35.4529,0,11.18,59784.12, +256,8,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,30.8842,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w4,0.0%,19.279,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic,1.3%,50.1632,0,15.8,42254.21, +256,16,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,54.5127,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3,0.0%,29.3089,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.3%,83.8216,0,18.91,25289.17, +256,32,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,93.3433,flydsl_moe1_afp4_wfp4_bf16_t32x32x256_w3,0.0%,53.0394,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.3%,146.3827,0,21.66,14483.42, +256,64,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,129.68970000000002,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4_xcd4,0.0%,68.2803,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,1.2%,197.97,0,32.03,10712.79, +256,128,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,126.7838,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4_xcd4_fp4,16.6%,70.9723,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_persist,1.2%,197.7561,0,64.14,10731.33, +256,256,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,127.3984,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_xcd4_fp4,17.0%,73.9814,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.2%,201.3798,0,125.97,10551.9, +256,512,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,130.7355,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.1%,82.6416,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2,1.2%,213.3771,0,237.77,9984.41, +256,1024,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,133.9164,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.4%,115.6845,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,1.2%,249.6009,0,406.52,8579.51, +256,2048,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,139.5789,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_fp4,17.2%,169.6701,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_bnt2_persist,0.0%,309.249,0,656.23,6995.9, +256,4096,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,177.8189,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_w2,0.0%,282.0472,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.0%,459.8661,0,882.59,4800.34, +256,8192,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,284.6439,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_xcd4,0.0%,486.7326,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.0%,771.3765,0,1052.34,2975.97, +256,16384,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,440.0206,flydsl_moe1_afp4_wfp4_bf16_t128x128x256_bnt0_xcd4,0.0%,979.1867,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce_persist_sbm128,0.0%,1419.2073,0,1143.95,1741.65, +256,32768,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,700.9508,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w4_bnt0_xcd4_fp4,17.3%,1957.461,flydsl_moe2_afp4_wfp4_bf16_t64x256x256_reduce,0.0%,2658.4118,0,1221.4,1062.32, +256,1,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,23.3826,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w3_kb7_fp4,19.6%,10.5981,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,3.4%,33.9807,0,11.66,249487.46, +256,2,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,31.5916,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2,0.0%,18.0772,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,4.6%,49.6688,0,15.96,170686.22, +256,4,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,53.5345,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w3_xcd4,0.0%,29.9048,flydsl_moe2_afp4_wfp4_bf16_t32x256x256_atomic_bnt2_xcd4,4.8%,83.4393,0,19.0,101604.68, +256,8,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,92.3327,flydsl_moe1_afp4_wfp4_bf16_t32x32x256_w3,0.0%,48.1776,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,4.7%,140.5103,0,22.57,60336.57, +256,16,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,148.1592,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,16.6%,80.207,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,5.1%,228.3662,0,27.77,37124.94, +256,32,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,242.291,flydsl_moe1_afp4_wfp4_bf16_t64x64x256_w4_fp4,16.8%,124.0,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2_persist_sbm64,5.1%,366.291,0,34.63,23146.69, +256,64,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,356.1691,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4,0.0%,180.7609,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2_persist,5.1%,536.93,0,47.24,15791.84, +256,128,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,439.8549,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w2,0.0%,227.2459,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2_persist,5.1%,667.1008,0,76.05,12712.46, +256,256,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,467.7362,flydsl_moe1_afp4_wfp4_bf16_t32x64x256_w4,0.0%,238.4351,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,5.1%,706.1713,0,143.69,12013.01, +256,512,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,469.2899,flydsl_moe1_afp4_wfp4_bf16_t64x64x256_w4_fp4,17.3%,251.0086,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2_sbm64,5.1%,720.2985,0,281.74,11785.04, +256,1024,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,477.9451,flydsl_moe1_afp4_wfp4_bf16_t32x128x256_w2_fp4,17.3%,276.6206,flydsl_moe2_afp4_wfp4_bf16_t32x128x256_atomic_bnt2,5.1%,754.5657,0,537.89,11264.44, +256,2048,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,504.3514,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w3_fp4,17.3%,315.2989,flydsl_moe2_afp4_wfp4_bf16_t64x128x256_atomic_bnt2_persist,5.1%,819.6503,0,990.36,10396.85, +256,4096,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,635.6289,flydsl_moe1_afp4_wfp4_bf16_t128x128x256,0.0%,532.6313,flydsl_moe2_afp4_wfp4_bf16_t128x128x256_atomic_bnt2,5.1%,1168.2602,0,1389.67,7332.11, +256,8192,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,911.92,flydsl_moe1_afp4_wfp4_bf16_t64x128x256_w4_bnt0_fp4,17.3%,1015.7784,flydsl_moe2_afp4_wfp4_bf16_t64x128x256_atomic,5.1%,1927.6984,0,1684.39,4489.24, +256,16384,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,1505.5926,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0%,1794.5213,flydsl_moe2_afp4_wfp4_bf16_t128x128x256_reduce,0.9%,3300.1139,0,1967.81,2675.68, +256,32768,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,2674.9513,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0%,3435.7292,flydsl_moe2_afp4_wfp4_bf16_t128x128x256_reduce,1.0%,6110.6805,0,2125.46,1502.68, +256,1,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,27.8284,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,10.5981,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0336,38.4265,0,0.0,0.0,flydsl_fallback +256,2,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,33.008,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,18.0772,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0463,51.0852,0,0.0,0.0,flydsl_fallback +256,4,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,55.4218,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,32.2415,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0469,87.6633,0,0.0,0.0,flydsl_fallback +256,8,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,100.8819,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,51.8052,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.048,152.6871,0,0.0,0.0,flydsl_fallback +256,16,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,167.9725,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,86.7403,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0505,254.7128,0,0.0,0.0,flydsl_fallback +256,32,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,266.6936,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,131.7576,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0507,398.4512,0,0.0,0.0,flydsl_fallback +256,64,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,395.8115,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,198.7102,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0511,594.5217,0,0.0,0.0,flydsl_fallback +256,128,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,488.1649,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,253.748,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0514,741.9129,0,0.0,0.0,flydsl_fallback +256,256,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,512.2106,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,273.9965,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0511,786.2071,0,0.0,0.0,flydsl_fallback +256,512,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,524.6804,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,286.3015,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0507,810.9819,0,0.0,0.0,flydsl_fallback +256,1024,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,534.8282,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,317.4022,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.051,852.2304,0,0.0,0.0,flydsl_fallback +256,2048,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,568.4449,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,466.1059,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0505,1034.5508,0,0.0,0.0,flydsl_fallback +256,4096,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,670.686,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,883.5967,moe_ck2stages_gemm2_256x128x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0507,1554.2827,0,0.0,0.0,flydsl_fallback +256,8192,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,945.22,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,1727.4364,moe_ck2stages_gemm2_256x128x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0507,2672.6564,0,0.0,0.0,flydsl_fallback +256,16384,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,1505.5926,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,3428.4607,moe_ck2stages_gemm2_256x128x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0507,4934.0533,0,0.0,0.0,flydsl_fallback +256,32768,7168,1024,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,2674.9513,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,6825.9125,moe_ck2stages_gemm2_256x128x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0507,9500.8638,0,0.0,0.0,flydsl_fallback +256,1,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,24.6248,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,6.6844,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0156,31.3092,0,0.0,0.0,flydsl_fallback +256,2,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,24.6277,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,8.4755,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0133,33.1032,0,0.0,0.0,flydsl_fallback +256,4,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,26.1829,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,14.3766,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0138,40.5595,0,0.0,0.0,flydsl_fallback +256,8,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,30.2074,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,18.9994,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0122,49.2068,0,0.0,0.0,flydsl_fallback +256,16,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,50.1851,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,28.8427,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0119,79.0278,0,0.0,0.0,flydsl_fallback +256,32,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,91.9274,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,56.4536,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0119,148.381,0,0.0,0.0,flydsl_fallback +256,64,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,133.52890000000002,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,72.2503,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0114,205.7792,0,0.0,0.0,flydsl_fallback +256,128,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,134.37,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,75.4038,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0113,209.7738,0,0.0,0.0,flydsl_fallback +256,256,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,135.7072,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,80.5166,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0117,216.2238,0,0.0,0.0,flydsl_fallback +256,512,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,136.8202,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,88.9883,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0117,225.8085,0,0.0,0.0,flydsl_fallback +256,1024,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,141.0647,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,117.1939,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0117,258.2586,0,0.0,0.0,flydsl_fallback +256,2048,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,150.5248,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,214.4916,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0117,365.0164,0,0.0,0.0,flydsl_fallback +256,4096,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,176.7558,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,403.0595,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0117,579.8153,0,0.0,0.0,flydsl_fallback +256,8192,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,267.11560000000003,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,759.2218,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0117,1026.3374,0,0.0,0.0,flydsl_fallback +256,16384,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,437.7072,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,1455.2831,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0116,1892.9903,0,0.0,0.0,flydsl_fallback +256,32768,7168,256,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,663.3669,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,2875.3131,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0116,3538.68,0,0.0,0.0,flydsl_fallback +256,1,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,24.539,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,7.2729,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0259,31.8119,0,0.0,0.0,flydsl_fallback +256,2,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,26.6509,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,14.035,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0296,40.6859,0,0.0,0.0,flydsl_fallback +256,4,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,31.2822,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,19.072,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0281,50.3542,0,0.0,0.0,flydsl_fallback +256,8,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,49.7396,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,29.5203,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0279,79.2599,0,0.0,0.0,flydsl_fallback +256,16,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,92.3538,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,48.1233,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0279,140.4771,0,0.0,0.0,flydsl_fallback +256,32,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,174.5219,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,85.8354,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0276,260.3573,0,0.0,0.0,flydsl_fallback +256,64,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,257.9019,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,125.3541,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0273,383.256,0,0.0,0.0,flydsl_fallback +256,128,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,256.3908,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,127.7442,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0273,384.135,0,0.0,0.0,flydsl_fallback +256,256,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,260.6848,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,133.543,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0273,394.2278,0,0.0,0.0,flydsl_fallback +256,512,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,263.3216,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,149.5248,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0273,412.8464,0,0.0,0.0,flydsl_fallback +256,1024,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,273.1676,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,224.9774,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0274,498.145,0,0.0,0.0,flydsl_fallback +256,2048,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,289.69550000000004,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,339.3983,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0272,629.0938,0,0.0,0.0,flydsl_fallback +256,4096,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,452.9873,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,569.9896,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0273,1022.9769,0,0.0,0.0,flydsl_fallback +256,8192,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,516.1552,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,938.8917,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0273,1455.0469,0,0.0,0.0,flydsl_fallback +256,16384,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,839.7145,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,1859.6769,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0273,2699.3914,0,0.0,0.0,flydsl_fallback +256,32768,7168,512,384,8,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,1469.5306,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,3517.7145,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0273,4987.2451,0,0.0,0.0,flydsl_fallback +256,1,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,25.0451,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,7.6621,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0241,32.7072,0,0.0,0.0,flydsl_fallback +256,2,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,27.1551,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,14.1879,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0284,41.343,0,0.0,0.0,flydsl_fallback +256,4,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,32.0801,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,19.8641,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0281,51.9442,0,0.0,0.0,flydsl_fallback +256,8,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,55.6951,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,31.6528,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0265,87.3479,0,0.0,0.0,flydsl_fallback +256,16,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,105.1921,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,53.563,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.028,158.7551,0,0.0,0.0,flydsl_fallback +256,32,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,199.5216,moe_ck2stages_gemm1_256x32x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,97.0299,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0288,296.5515,0,0.0,0.0,flydsl_fallback +256,64,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,260.8307,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,125.6864,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0284,386.5171,0,0.0,0.0,flydsl_fallback +256,128,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,266.3821,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,128.3034,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0285,394.6855,0,0.0,0.0,flydsl_fallback +256,256,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,265.6527,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,135.5647,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0286,401.2174,0,0.0,0.0,flydsl_fallback +256,512,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,269.6034,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,162.5341,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0283,432.1375,0,0.0,0.0,flydsl_fallback +256,1024,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,280.10200000000003,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,275.7737,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0284,555.8757,0,0.0,0.0,flydsl_fallback +256,2048,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,311.42830000000004,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,349.9696,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0284,661.3979,0,0.0,0.0,flydsl_fallback +256,4096,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,460.8264,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,617.8159,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0284,1078.6423,0,0.0,0.0,flydsl_fallback +256,8192,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,539.202,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,995.9833,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0284,1535.1853,0,0.0,0.0,flydsl_fallback +256,16384,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,876.0268,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,1986.4763,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0285,2862.5031,0,0.0,0.0,flydsl_fallback +256,32768,7168,512,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,1611.5569,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,3894.0732,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0285,5505.6301,0,0.0,0.0,flydsl_fallback +256,1,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,23.9613,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,6.8161,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0124,30.7774,0,0.0,0.0,flydsl_fallback +256,2,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,24.9598,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,8.4904,moe_ck2stages_gemm2_256x32x128x128_1x4_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0102,33.4502,0,0.0,0.0,flydsl_fallback +256,4,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,28.0817,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,14.2802,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0133,42.3619,0,0.0,0.0,flydsl_fallback +256,8,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,31.893,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,19.8686,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0125,51.7616,0,0.0,0.0,flydsl_fallback +256,16,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,55.979400000000005,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,31.3644,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0123,87.3438,0,0.0,0.0,flydsl_fallback +256,32,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,105.7492,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,54.6225,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.013,160.3717,0,0.0,0.0,flydsl_fallback +256,64,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,138.129,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,70.2826,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0122,208.4116,0,0.0,0.0,flydsl_fallback +256,128,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,139.39800000000002,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,76.9762,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.012,216.3742,0,0.0,0.0,flydsl_fallback +256,256,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,140.2029,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,78.7722,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0124,218.9751,0,0.0,0.0,flydsl_fallback +256,512,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,142.1878,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,92.4496,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0123,234.6374,0,0.0,0.0,flydsl_fallback +256,1024,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,32,0,148.3272,moe_ck2stages_gemm1_64x32x32x128_1x1_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,133.2883,moe_ck2stages_gemm2_64x32x32x128_1x1_MulABScaleExpertWeightShuffled_v1_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0122,281.6155,0,0.0,0.0,flydsl_fallback +256,2048,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,64,0,163.71689999999998,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,237.3456,moe_ck2stages_gemm2_64x64x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0123,401.0625,0,0.0,0.0,flydsl_fallback +256,4096,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,194.9509,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,451.9496,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0123,646.9005,0,0.0,0.0,flydsl_fallback +256,8192,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,288.8715,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,857.3649,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0123,1146.2364,0,0.0,0.0,flydsl_fallback +256,16384,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,463.6276,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,1723.7202,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0123,2187.3478,0,0.0,0.0,flydsl_fallback +256,32768,7168,256,385,9,ActivationType.Silu,torch.bfloat16,torch.float4_e2m1fn_x2,torch.float4_e2m1fn_x2,QuantType.per_1x32,1,0,128,0,702.7786,moe_ck2stages_gemm1_256x128x128x128_1x4_MulABScaleShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight0_silu_FP4X2_FP4X2_B16,0.0,3373.9364,moe_ck2stages_gemm2_64x128x128x128_1x1_MulABScaleExpertWeightShuffled_v3_Nswizzle0_Quant3_MulRoutedWeight1_FP4X2_FP4X2_B16,0.0123,4076.715,0,0.0,0.0,flydsl_fallback diff --git a/aiter/configs/tuned_fmoe.csv b/aiter/configs/tuned_fmoe.csv index a0c9dd2a9f..2dfbb76fc3 100644 --- a/aiter/configs/tuned_fmoe.csv +++ b/aiter/configs/tuned_fmoe.csv @@ -1,1700 +1,948 @@ cu_num,token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,q_type,use_g1u1,doweight_stage1,block_m,ksplit,us1,kernelName1,err1,us2,kernelName2,err2,us,run_1stage,tflops,bw,_tag -80,512,6144,4096,8,2,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,64,0,373.4158,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_64x128_2tg_pf3E,0.0%,268.4886,moe_ck2stages_gemm2_256x64x128x256_1x4_MulABScaleExpertWeight_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.3%,641.9044,0,240.88,955.62, -80,512,6144,4096,8,2,ActivationType.Silu,torch.bfloat16,torch.int8,torch.int8,QuantType.per_Tensor,1,0,64,0,386.1143,_ZN5aiter49fmoe_stage1_bf16_pertokenInt8_g1u1_64x128_2tg_pf3E,0.0%,250.0186,moe_ck2stages_gemm2_256x64x128x256_1x4_MulABScaleExpertWeight_v3_Nswizzle0_Quant1_MulRoutedWeight1_I8_I8_B16,2.1%,636.1329000000001,0,243.06,964.29, -80,4,2304,1536,8,2,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,17.6606,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,15.126,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.3%,32.7866,0,5.18,2591.37, +80,1,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,14.7181,moe_ck2stages_gemm1_256x16x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,8.8578,moe_ck2stages_gemm2_64x16x64x64_1x1_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.1%,23.5759,0,1.6,12809.78, +80,2,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,16.1962,moe_ck2stages_gemm1_256x16x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,10.9481,moe_ck2stages_gemm2_64x16x64x64_1x1_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.1%,27.1443,0,2.78,11126.26, 80,4,2304,1536,8,2,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,17.8008,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,14.5115,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,32.3123,0,5.26,2629.41, -80,512,6144,4096,8,2,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,774.6328,moe_ck2stages_gemm1_256x64x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,459.0113,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCastExpertWeight_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.3%,1233.6441,0,125.34,989.38, -256,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,130.4639,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,70.3202,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,200.7841,0,7.02,14040.11, -256,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,130.4639,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,70.3202,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,200.7841,0,7.02,14040.11, -256,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,130.4639,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,70.3202,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,200.7841,0,7.02,14040.11, -256,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,130.4639,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,70.3202,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,200.7841,0,7.02,14040.11, -256,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,130.4639,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,70.3202,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,200.7841,0,7.02,14040.11, -256,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,195.38,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,107.5659,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,302.9459,0,9.3,9306.91, -256,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,278.093,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,140.8376,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,418.9306,0,13.46,6732.4, -256,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,294.8532,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,160.973,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,455.8262,0,24.73,6191.49, -256,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,306.0006,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,170.2105,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,476.2111,0,47.35,5934.16, -256,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,309.2402,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,184.9719,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,494.2121,0,91.25,5732.87, -256,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.0568,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,231.4032,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,556.46,0,162.09,5117.95, -256,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,128.2525,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,72.0121,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,200.2646,0,7.04,14076.53, -256,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,128.2525,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,72.0121,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,200.2646,0,7.04,14076.53, -256,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,128.2525,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,72.0121,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,200.2646,0,7.04,14076.53, -256,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,128.2525,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,72.0121,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,200.2646,0,7.04,14076.53, -256,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,128.2525,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,72.0121,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,200.2646,0,7.04,14076.53, -256,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,195.9999,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,102.7882,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,298.7881,0,9.43,9436.42, -256,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,277.4499,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,139.0861,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,416.536,0,13.53,6771.1, -256,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,296.1855,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,156.7142,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,452.8997,0,24.89,6231.5, -256,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,306.2672,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,164.6962,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,470.9634,0,47.88,6000.28, -256,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,309.6434,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,178.4363,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,488.0797,0,92.4,5804.9, -256,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,325.7872,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,223.4421,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.2293,0,164.22,5185.32, -256,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,67.4265,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,41.189,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,108.6155,0,12.98,12978.17, -256,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,67.4265,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,41.189,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,108.6155,0,12.98,12978.17, -256,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,67.4265,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,41.189,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,108.6155,0,12.98,12978.17, -256,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,67.4265,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,41.189,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,108.6155,0,12.98,12978.17, -256,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,67.4265,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,41.189,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,108.6155,0,12.98,12978.17, -256,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,102.7345,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,56.8998,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,159.6343,0,17.66,8832.53, -256,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,140.8235,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,76.5494,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,217.3729,0,25.93,6489.6, -256,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,152.4946,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,85.8934,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,238.388,0,47.29,5923.28, -256,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,158.9481,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,92.9698,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,251.91790000000003,0,89.51,5616.08, -256,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,161.9427,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,114.4508,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,276.3935,0,163.16,5138.67, -256,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,168.3246,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,205.6813,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,374.0059,0,241.16,3826.96, -256,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,68.0621,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,40.8199,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,108.882,0,12.94,12946.4, -256,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,68.0621,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,40.8199,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,108.882,0,12.94,12946.4, -256,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,68.0621,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,40.8199,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,108.882,0,12.94,12946.4, -256,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,68.0621,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,40.8199,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,108.882,0,12.94,12946.4, -256,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,68.0621,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,40.8199,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,108.882,0,12.94,12946.4, -256,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,102.8318,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,57.3307,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,160.1625,0,17.6,8803.4, -256,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,141.6806,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,77.5578,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,219.2384,0,25.71,6434.38, -256,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,154.7826,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,86.5442,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,241.3268,0,46.72,5851.15, -256,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,159.3862,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,95.0034,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,254.3896,0,88.64,5561.51, -256,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,162.5288,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.9963,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,276.5251,0,163.09,5136.23, -256,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,168.5532,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,205.3887,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,373.9419,0,241.2,3827.62, -256,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,68.6613,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,46.3816,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.7%,115.0429,0,12.25,12253.08, -256,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,68.6613,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,46.3816,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.7%,115.0429,0,12.25,12253.08, -256,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,68.6613,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,46.3816,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.7%,115.0429,0,12.25,12253.08, -256,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,68.6613,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,46.3816,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.7%,115.0429,0,12.25,12253.08, -256,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,68.6613,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,46.3816,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.7%,115.0429,0,12.25,12253.08, -256,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,158.0965,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,158.0965,1,17.83,8918.44, -256,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,215.8536,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,215.8536,1,26.12,6535.27, -256,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,244.4529,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_ps_32x256E,0.0%,0.0,Null,0,244.4529,1,46.12,5776.32, -256,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,254.5557,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,254.5557,1,88.58,5557.88, -256,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,267.5654,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_ps_32x256E,0.0%,0.0,Null,0,267.5654,1,168.55,5308.22, -256,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,366.6991,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,366.6991,1,245.96,3903.22, -256,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,68.3263,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,44.1851,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,112.5114,0,12.53,12528.78, -256,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,68.3263,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,44.1851,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,112.5114,0,12.53,12528.78, -256,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,68.3263,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,44.1851,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,112.5114,0,12.53,12528.78, -256,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,68.3263,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,44.1851,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,112.5114,0,12.53,12528.78, -256,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,68.3263,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,44.1851,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,112.5114,0,12.53,12528.78, -256,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,100.365,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,61.0618,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,161.4268,0,17.46,8734.45, -256,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,140.407,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,78.9057,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,219.3127,0,25.7,6432.2, -256,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,153.1395,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,91.6374,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,244.7769,0,46.06,5768.68, -256,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,159.338,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,102.7582,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,262.0962,0,86.03,5397.98, -256,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,161.3644,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf2E,0.0%,132.3204,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,293.6848,0,153.56,4836.12, -256,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,163.9563,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf3E,0.0%,218.341,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,382.2973,0,235.93,3743.96, -256,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,268.7481,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,135.0723,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,403.82040000000006,0,6.98,13960.67, -256,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,268.7481,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,135.0723,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,403.82040000000006,0,6.98,13960.67, -256,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,268.7481,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,135.0723,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,403.82040000000006,0,6.98,13960.67, -256,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,268.7481,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,135.0723,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,403.82040000000006,0,6.98,13960.67, -256,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,268.7481,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,135.0723,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,403.82040000000006,0,6.98,13960.67, -256,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,378.5195,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,196.1646,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,574.6841,0,9.81,9810.72, -256,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,559.7713,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,271.7302,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,831.5015000000001,0,13.56,6781.68, -256,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,608.7673,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,313.0697,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,921.837,0,24.46,6119.1, -256,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,612.6749,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,322.9055,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,935.5804,0,48.2,6033.14, -256,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,623.7185,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,338.7751,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,962.4936,0,93.71,5872.06, -256,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.3028,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,368.4383,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1017.7411,0,177.24,5567.73, -256,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,265.8935,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,135.088,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,400.9815,0,7.03,14059.51, -256,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,265.8935,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,135.088,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,400.9815,0,7.03,14059.51, -256,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,265.8935,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,135.088,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,400.9815,0,7.03,14059.51, -256,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,265.8935,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,135.088,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,400.9815,0,7.03,14059.51, -256,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,265.8935,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,135.088,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,400.9815,0,7.03,14059.51, -256,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,376.5017,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,196.4837,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,572.9854,0,9.84,9839.8, -256,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,556.9744,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,271.6147,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,828.5890999999999,0,13.61,6805.52, -256,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,610.8427,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,308.854,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,919.6967,0,24.52,6133.34, -256,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,614.0275,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,317.5052,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,931.5327,0,48.41,6059.35, -256,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,624.6592,moe_ck2stages_gemm1_256x64x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,332.6196,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,957.2788,0,94.22,5904.05, -256,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,644.3248,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,363.2348,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1007.5596,0,179.04,5623.99, -256,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,139.2785,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,70.4958,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,209.7743,0,13.44,13437.85, -256,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,139.2785,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,70.4958,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,209.7743,0,13.44,13437.85, -256,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,139.2785,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,70.4958,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,209.7743,0,13.44,13437.85, -256,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,139.2785,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,70.4958,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,209.7743,0,13.44,13437.85, -256,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,139.2785,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,70.4958,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,209.7743,0,13.44,13437.85, -256,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,194.034,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,100.7957,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,294.8297,0,19.12,9562.34, -256,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,274.3536,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf2E,0.0%,140.3968,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,414.7504,0,27.18,6799.15, -256,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,312.5621,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,161.8549,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,474.417,0,47.53,5946.93, -256,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,314.2275,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,169.8802,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,484.1077,0,93.16,5833.57, -256,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,318.189,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf2E,0.0%,183.5436,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,501.7326000000001,0,179.77,5639.62, -256,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.7642,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_64x128_2tg_pf2E,0.0%,226.0569,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,554.8211,0,325.13,5119.83, -256,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,138.6795,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,70.6801,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,209.3596,0,13.46,13464.47, -256,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,138.6795,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,70.6801,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,209.3596,0,13.46,13464.47, -256,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,138.6795,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,70.6801,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,209.3596,0,13.46,13464.47, -256,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,138.6795,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,70.6801,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,209.3596,0,13.46,13464.47, -256,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,138.6795,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,70.6801,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,209.3596,0,13.46,13464.47, -256,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,193.8469,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,101.2026,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,295.0495,0,19.11,9555.21, -256,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,277.7873,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,141.146,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,418.9333,0,26.91,6731.26, -256,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,310.6676,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,164.6245,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,475.2921,0,47.44,5935.98, -256,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,314.5026,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,168.2646,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,482.7672,0,93.41,5849.77, -256,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,318.2151,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,183.4334,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,501.6485,0,179.8,5640.57, -256,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.5261,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,225.2001,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,553.7262,0,325.77,5129.96, -256,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,203.1825,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,203.1825,1,13.87,13873.81, -256,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,203.1825,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,203.1825,1,13.87,13873.81, -256,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,203.1825,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,203.1825,1,13.87,13873.81, -256,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,203.1825,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,203.1825,1,13.87,13873.81, -256,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,203.1825,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,203.1825,1,13.87,13873.81, -256,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,196.4497,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf2E,0.0%,105.2123,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,301.66200000000003,0,18.69,9345.76, -256,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,264.1173,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf2E,0.0%,144.1125,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,408.2298,0,27.62,6907.75, -256,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,306.5916,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf2E,0.0%,167.7523,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,474.3439,0,47.54,5947.85, -256,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,482.7665,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_ps_32x512E,0.0%,0.0,Null,0,482.7665,1,93.41,5849.78, -256,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,494.6598,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_ps_32x512E,0.0%,0.0,Null,0,494.6598,1,182.34,5720.26, -256,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.8711,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,248.6884,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,582.5595000000001,0,309.65,4876.06, -256,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,129.8926,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf2E,0.0%,73.8599,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,203.7525,0,13.83,13835.0, -256,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,129.8926,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf2E,0.0%,73.8599,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,203.7525,0,13.83,13835.0, -256,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,129.8926,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf2E,0.0%,73.8599,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,203.7525,0,13.83,13835.0, -256,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,129.8926,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf2E,0.0%,73.8599,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,203.7525,0,13.83,13835.0, -256,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,129.8926,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf2E,0.0%,73.8599,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,203.7525,0,13.83,13835.0, -256,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,196.3192,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf2E,0.0%,102.4978,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,298.817,0,18.86,9434.74, -256,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,264.1664,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,141.5633,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,405.7297,0,27.79,6950.31, -256,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,305.1523,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,165.1653,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,470.3176,0,47.94,5998.77, -256,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,305.4521,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,172.4236,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,477.8757,0,94.37,5909.65, -256,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,306.7972,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,190.6723,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,497.4695,0,181.31,5687.95, -256,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,333.2413,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,244.2778,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,577.5191,0,312.35,4918.61, -256,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,89.5023,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,51.4998,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,141.0021,0,8.57,8568.82, -256,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,89.5023,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,51.4998,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,141.0021,0,8.57,8568.82, -256,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,89.5023,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,51.4998,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,141.0021,0,8.57,8568.82, -256,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,89.5023,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,51.4998,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,141.0021,0,8.57,8568.82, -256,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,89.5023,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,51.4998,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,141.0021,0,8.57,8568.82, -256,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,127.8742,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,68.7529,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,196.6271,0,12.29,6146.07, -256,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,136.8058,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,75.6377,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,212.4435,0,22.74,5690.96, -256,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,138.3157,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,77.7254,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,216.0411,0,44.73,5601.05, -256,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.7161,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,80.4366,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,221.1527,0,87.39,5481.07, -256,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,144.981,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,104.8371,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,249.8181,0,154.73,4868.94, -256,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,171.882,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,152.5554,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,324.4374,0,238.29,3774.96, -256,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,89.9594,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,51.0022,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,140.9616,0,8.57,8571.28, -256,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,89.9594,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,51.0022,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,140.9616,0,8.57,8571.28, -256,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,89.9594,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,51.0022,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,140.9616,0,8.57,8571.28, -256,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,89.9594,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,51.0022,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,140.9616,0,8.57,8571.28, -256,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,89.9594,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,51.0022,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,140.9616,0,8.57,8571.28, -256,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,127.4464,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,69.0267,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,196.4731,0,12.3,6150.89, -256,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,136.676,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,75.4552,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,212.1312,0,22.78,5699.34, -256,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,139.2281,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,77.6845,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,216.9126,0,44.55,5578.55, -256,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.0976,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,80.1778,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,221.2754,0,87.35,5478.03, -256,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,131.8625,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,70.246,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.4%,202.1085,0,6.97,13948.11, -256,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,131.8625,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,70.246,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.4%,202.1085,0,6.97,13948.11, -256,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,131.8625,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,70.246,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.4%,202.1085,0,6.97,13948.11, -256,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,131.8625,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,70.246,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.4%,202.1085,0,6.97,13948.11, -256,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,131.8625,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,70.246,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.4%,202.1085,0,6.97,13948.11, -256,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,198.5347,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,102.7245,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.4%,301.2592,0,9.36,9359.02, -256,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,277.5506,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,141.6194,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,419.17,0,13.45,6728.55, -256,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,298.4138,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,160.1887,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,458.6025,0,24.58,6154.01, -256,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,307.9132,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,170.5755,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,478.4887,0,47.12,5905.91, -256,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,310.8521,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,187.6128,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,498.4649,0,90.47,5683.96, -256,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.8822,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,228.9235,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,554.8057,0,162.57,5133.21, -256,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,128.4088,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,71.9127,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,200.3215,0,7.04,14072.53, -256,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,128.4088,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,71.9127,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,200.3215,0,7.04,14072.53, -256,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,128.4088,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,71.9127,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,200.3215,0,7.04,14072.53, -256,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,128.4088,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,71.9127,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,200.3215,0,7.04,14072.53, -256,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,128.4088,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,71.9127,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,200.3215,0,7.04,14072.53, -256,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,198.743,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,102.3427,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,301.0857,0,9.36,9364.41, -256,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,278.5912,moe_ck2stages_gemm1_256x64x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,137.9968,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,416.588,0,13.53,6770.26, -256,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,298.2262,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,156.1018,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,454.328,0,24.82,6211.9, -256,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,307.5774,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,164.881,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,472.4584,0,47.73,5981.29, -256,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,311.7026,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,177.8505,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,489.5531,0,92.12,5787.43, -256,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,326.6863,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,223.1283,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.8146,0,164.04,5179.81, -256,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,67.999,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,40.5166,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.4%,108.5156,0,12.99,12990.12, -256,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,67.999,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,40.5166,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.4%,108.5156,0,12.99,12990.12, -256,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,67.999,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,40.5166,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.4%,108.5156,0,12.99,12990.12, -256,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,67.999,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,40.5166,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.4%,108.5156,0,12.99,12990.12, -256,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,67.999,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,40.5166,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.4%,108.5156,0,12.99,12990.12, -256,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,102.4854,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,56.7236,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.4%,159.209,0,17.7,8856.12, -256,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,140.7195,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,75.8797,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,216.5992,0,26.03,6512.78, -256,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,154.2075,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,85.2527,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,239.4602,0,47.08,5896.76, -256,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,158.7735,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,92.6941,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,251.4676,0,89.67,5626.14, -256,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,162.9055,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,114.6803,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,277.5858,0,162.46,5116.6, -256,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,171.438,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,205.2641,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,376.7021,0,239.43,3799.57, -256,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,67.7683,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,40.8282,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,108.5965,0,12.98,12980.44, -256,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,67.7683,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,40.8282,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,108.5965,0,12.98,12980.44, -256,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,67.7683,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,40.8282,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,108.5965,0,12.98,12980.44, -256,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,67.7683,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,40.8282,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,108.5965,0,12.98,12980.44, -256,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,67.7683,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,40.8282,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,108.5965,0,12.98,12980.44, -256,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,104.0822,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,57.7224,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,161.8046,0,17.42,8714.06, -256,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,142.2581,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,77.8633,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,220.1214,0,25.61,6408.57, -256,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,153.3471,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,86.6202,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,239.9673,0,46.98,5884.3, -256,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,158.256,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,94.9442,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,253.2002,0,89.05,5587.64, -256,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,162.1092,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,114.1086,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,276.2178,0,163.27,5141.94, -256,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,169.5988,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,205.5691,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,375.1679,0,240.41,3815.11, -256,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,68.2374,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,46.5664,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,114.8038,0,12.28,12278.6, -256,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,68.2374,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,46.5664,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,114.8038,0,12.28,12278.6, -256,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,68.2374,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,46.5664,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,114.8038,0,12.28,12278.6, -256,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,68.2374,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,46.5664,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,114.8038,0,12.28,12278.6, -256,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,68.2374,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,46.5664,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,114.8038,0,12.28,12278.6, -256,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,100.638,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,64.7122,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,165.3502,0,17.05,8527.2, -256,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,139.9452,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,84.8694,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,224.8146,0,25.07,6274.78, -256,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,246.4494,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x128E,0.0%,0.0,Null,0,246.4494,1,45.75,5729.53, -256,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,267.3292,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0,267.3292,1,84.35,5292.32, -256,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,272.5758,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0,272.5758,1,165.45,5210.65, -256,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,367.9317,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0,367.9317,1,245.14,3890.14, -256,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,68.7365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,44.3023,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.0388,0,12.47,12470.32, -256,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,68.7365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,44.3023,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.0388,0,12.47,12470.32, -256,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,68.7365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,44.3023,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.0388,0,12.47,12470.32, -256,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,68.7365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,44.3023,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.0388,0,12.47,12470.32, -256,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,68.7365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,44.3023,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.0388,0,12.47,12470.32, -256,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,268.2034,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,134.7329,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,402.9363,0,7.0,13991.3, -256,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,268.2034,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,134.7329,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,402.9363,0,7.0,13991.3, -256,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,268.2034,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,134.7329,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,402.9363,0,7.0,13991.3, -256,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,268.2034,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,134.7329,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,402.9363,0,7.0,13991.3, -256,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,268.2034,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,134.7329,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,402.9363,0,7.0,13991.3, -256,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,380.515,moe_ck2stages_gemm1_256x64x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,194.6522,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,575.1672,0,9.8,9802.47, -256,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,563.744,moe_ck2stages_gemm1_256x64x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,268.523,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,832.267,0,13.55,6775.45, -256,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,605.8382,moe_ck2stages_gemm1_256x64x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,308.8384,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,914.6766,0,24.65,6167.0, -256,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,613.3676,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,316.2011,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,929.5687,0,48.51,6072.15, -256,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,621.3253,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,334.5202,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,955.8455,0,94.36,5912.91, -256,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,647.0892,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,363.288,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1010.3772,0,178.54,5608.31, -256,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,138.6611,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,69.9939,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,208.655,0,13.51,13509.94, -256,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,138.6611,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,69.9939,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,208.655,0,13.51,13509.94, -256,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,138.6611,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,69.9939,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,208.655,0,13.51,13509.94, -256,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,138.6611,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,69.9939,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,208.655,0,13.51,13509.94, -256,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,138.6611,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,69.9939,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,208.655,0,13.51,13509.94, -256,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,194.5186,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,101.0683,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,295.5869,0,19.07,9537.84, -256,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,278.8859,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf2E,0.0%,141.0514,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,419.0,0,26.85,6715.17, -256,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,310.9421,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,164.4566,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,475.3987,0,47.43,5934.65, -256,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,313.5176,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,168.5062,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,482.0238000000001,0,93.56,5858.79, -256,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,32,0,316.7569,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf2E,0.0%,183.8783,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,500.6352,0,180.16,5651.98, -256,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.9474,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_64x256_pf2E,0.0%,225.6663,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,554.6137,0,325.25,5121.75, -256,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,138.8701,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,70.2409,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,209.111,0,13.48,13480.48, -256,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,138.8701,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,70.2409,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,209.111,0,13.48,13480.48, -256,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,138.8701,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,70.2409,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,209.111,0,13.48,13480.48, -256,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,138.8701,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,70.2409,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,209.111,0,13.48,13480.48, -256,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,138.8701,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,70.2409,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,209.111,0,13.48,13480.48, -256,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,195.414,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,101.3303,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,296.74429999999995,0,19.0,9500.64, -256,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,275.5829,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,141.8053,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,417.3882,0,27.01,6756.18, -256,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,311.4401,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,164.7923,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,476.2324,0,47.35,5924.26, -256,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,314.331,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,169.9851,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,484.3161,0,93.12,5831.06, -256,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,32,0,315.989,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,183.8335,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,499.8225,0,180.45,5661.17, -256,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.6203,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x256_pf2E,0.0%,227.39,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,556.0102999999999,0,324.43,5108.88, -256,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,130.3234,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,74.9639,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,205.2873,0,13.73,13731.57, -256,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,130.3234,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,74.9639,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,205.2873,0,13.73,13731.57, -256,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,130.3234,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,74.9639,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,205.2873,0,13.73,13731.57, -256,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,130.3234,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,74.9639,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,205.2873,0,13.73,13731.57, -256,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,130.3234,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,74.9639,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,205.2873,0,13.73,13731.57, -256,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,298.4201,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x512E,0.0%,0.0,Null,0,298.4201,1,18.89,9447.29, -256,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,266.0248,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf2E,0.0%,146.7599,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,412.7847,0,27.31,6831.52, -256,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,306.4239,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf2E,0.0%,166.6554,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,473.0793,0,47.66,5963.75, -256,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,304.5903,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf2E,0.0%,178.5369,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,483.1272,0,93.34,5845.41, -256,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,498.7106,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x512E,0.0%,0.0,Null,0,498.7106,1,180.86,5673.8, -256,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.1281,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,249.1231,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,582.2512,0,309.81,4878.64, -256,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,130.2529,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,74.4948,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,204.7477,0,13.77,13767.76, -256,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,130.2529,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,74.4948,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,204.7477,0,13.77,13767.76, -256,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,130.2529,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,74.4948,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,204.7477,0,13.77,13767.76, -256,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,130.2529,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,74.4948,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,204.7477,0,13.77,13767.76, -256,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,130.2529,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,74.4948,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,204.7477,0,13.77,13767.76, -256,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,195.8963,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,103.7824,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,299.6787,0,18.81,9407.61, -256,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,265.4483,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,142.3836,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,407.8319,0,27.64,6914.49, -256,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,306.0949,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,166.151,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,472.2459,0,47.75,5974.27, -256,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,304.4449,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,177.3471,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,481.792,0,93.6,5861.61, -256,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,306.5738,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf2E,0.0%,190.3011,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,496.8749,0,181.52,5694.76, -256,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,332.9031,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,245.6419,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,578.545,0,311.8,4909.89, -256,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,89.9773,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,50.9117,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,140.889,0,8.57,8575.7, -256,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,89.9773,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,50.9117,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,140.889,0,8.57,8575.7, -256,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,89.9773,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,50.9117,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,140.889,0,8.57,8575.7, -256,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,89.9773,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,50.9117,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,140.889,0,8.57,8575.7, -256,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,89.9773,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,50.9117,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,140.889,0,8.57,8575.7, -256,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,127.6678,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,69.4848,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,197.1526,0,12.25,6129.69, -256,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,136.4991,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,75.3558,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,211.8549,0,22.81,5706.77, -256,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,139.0821,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,77.5844,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,216.6665,0,44.6,5584.88, -256,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,141.1941,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,80.8519,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,222.046,0,87.04,5459.02, -256,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,144.5293,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,104.6954,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,249.2247,0,155.1,4880.53, -256,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,169.6864,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,153.1648,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,322.8512,0,239.46,3793.5, -256,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,90.339,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,50.8203,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,141.1593,0,8.56,8559.28, -256,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,90.339,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,50.8203,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,141.1593,0,8.56,8559.28, -256,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,90.339,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,50.8203,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,141.1593,0,8.56,8559.28, -256,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,90.339,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,50.8203,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,141.1593,0,8.56,8559.28, -256,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,90.339,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,50.8203,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,141.1593,0,8.56,8559.28, -256,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,127.4505,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,69.9278,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,197.3783,0,12.24,6122.68, -256,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,136.5934,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,76.0763,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,212.6697,0,22.72,5684.91, -256,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,138.5011,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,78.0269,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,216.528,0,44.63,5588.45, -256,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,140.488,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,80.8556,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,221.3436,0,87.32,5476.34, -256,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,144.855,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,97.8569,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,242.7119,0,159.26,5011.49, -256,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.8959,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,144.4775,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.3734,0,244.36,3871.17, -256,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,50.9507,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,42.2681,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,93.2188,0,12.96,6481.27, -256,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,50.9507,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,42.2681,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,93.2188,0,12.96,6481.27, -256,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,50.9507,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,42.2681,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,93.2188,0,12.96,6481.27, -256,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,50.9507,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,42.2681,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,93.2188,0,12.96,6481.27, -256,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,50.9507,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,42.2681,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,93.2188,0,12.96,6481.27, -256,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,79.6392,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,54.2772,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,133.9164,0,18.04,4513.06, -256,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,83.5661,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,60.2323,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,143.79840000000002,0,33.6,4205.65, -256,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,84.7796,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,61.864,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,146.6436,0,65.9,4129.42, -256,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,85.0781,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,66.2997,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,151.3778,0,127.68,4010.66, -256,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,87.3865,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,82.0796,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,169.46609999999998,0,228.1,3601.14, -256,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.393,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,125.505,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,217.898,0,354.8,2829.59, -256,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,51.387,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,41.0638,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,92.4508,0,13.07,6535.11, -256,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,51.387,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,41.0638,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,92.4508,0,13.07,6535.11, -256,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,51.387,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,41.0638,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,92.4508,0,13.07,6535.11, -256,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,51.387,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,41.0638,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,92.4508,0,13.07,6535.11, -256,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,51.387,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,41.0638,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,92.4508,0,13.07,6535.11, -256,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,81.0339,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,51.9652,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,132.9991,0,18.16,4544.19, -256,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,84.9852,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,57.2253,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,142.2105,0,33.98,4252.61, -256,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,86.5581,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,59.0823,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,145.6404,0,66.35,4157.86, -256,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,86.6075,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.6415,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,151.249,0,127.78,4014.08, -256,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,91.2218,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,82.8771,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,174.0989,0,222.03,3505.31, -256,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.6859,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,125.2007,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.8866,0,353.19,2816.81, -256,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,76.0789,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0,76.0789,1,15.88,7941.44, -256,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,76.0789,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0,76.0789,1,15.88,7941.44, -256,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,76.0789,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0,76.0789,1,15.88,7941.44, -256,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,76.0789,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0,76.0789,1,15.88,7941.44, -256,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,76.0789,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0,76.0789,1,15.88,7941.44, -256,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,105.3523,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0,105.3523,1,22.93,5736.69, -256,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,115.4816,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0,115.4816,1,41.84,5236.91, -256,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,118.3827,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0,118.3827,1,81.63,5115.21, -256,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,120.3527,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0,120.3527,1,160.59,5044.55, -256,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,149.2682,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x384E,0.0%,0.0,Null,0,149.2682,1,258.96,4088.42, -256,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,91.8397,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.1852,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,251.0249,0,307.98,2456.18, -256,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,49.7186,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,64.1778,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.8964,0,10.61,5304.61, -256,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,49.7186,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,64.1778,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.8964,0,10.61,5304.61, -256,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,49.7186,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,64.1778,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.8964,0,10.61,5304.61, -256,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,49.7186,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,64.1778,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.8964,0,10.61,5304.61, -256,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,49.7186,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,64.1778,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.8964,0,10.61,5304.61, -256,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,80.6595,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x64_pf3E,0.0%,84.4366,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,165.09609999999998,0,14.63,3660.73, -256,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,83.8169,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x64_pf3E,0.0%,88.246,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,172.0629,0,28.08,3514.8, -256,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,86.7298,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,90.677,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,177.4068,0,54.47,3413.36, -256,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,86.3577,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x64_pf3E,0.0%,95.0934,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,181.4511,0,106.52,3345.95, -256,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,89.1708,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x64_pf3E,0.0%,112.8318,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,202.0026,0,191.36,3021.11, -256,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,92.5497,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,158.3452,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,250.8949,0,308.13,2457.45, -256,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,100.2236,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,60.6688,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,160.8924,0,17.52,8763.46, -256,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,139.0921,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf2E,0.0%,81.0365,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,220.1286,0,25.61,6408.36, -256,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,153.5346,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf2E,0.0%,92.7369,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,246.2715,0,45.78,5733.67, -256,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,159.5203,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,103.4399,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,262.9602,0,85.75,5380.25, -256,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,160.4127,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf2E,0.0%,132.4101,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,292.82280000000003,0,154.01,4850.36, -256,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,162.8098,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,221.1124,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,383.9222,0,234.93,3728.12, -256,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,269.3232,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.7722,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,404.0954,0,6.98,13951.17, -256,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,269.3232,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.7722,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,404.0954,0,6.98,13951.17, -256,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,269.3232,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.7722,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,404.0954,0,6.98,13951.17, -256,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,269.3232,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.7722,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,404.0954,0,6.98,13951.17, -256,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,269.3232,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.7722,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,404.0954,0,6.98,13951.17, -256,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,381.0416,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,195.7302,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,576.7718,0,9.77,9775.2, -256,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,562.6212,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,271.5572,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,834.1784,0,13.52,6759.92, -256,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,610.7903,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,311.8041,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,922.5944,0,24.44,6114.08, -256,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,616.9613,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,321.0372,moe_ck2stages_gemm2_256x32x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,937.9985,0,48.08,6017.58, -256,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,622.7788,moe_ck2stages_gemm1_256x64x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,341.9723,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,964.7511,0,93.49,5858.32, -256,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.2553,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.4126,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1020.6679,0,176.74,5551.76, -256,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.7102,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,144.3167,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.0269,0,244.63,3875.42, -256,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,50.0987,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,42.6821,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,92.7808,0,13.02,6511.87, -256,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,50.0987,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,42.6821,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,92.7808,0,13.02,6511.87, -256,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,50.0987,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,42.6821,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,92.7808,0,13.02,6511.87, -256,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,50.0987,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,42.6821,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,92.7808,0,13.02,6511.87, -256,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,50.0987,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,42.6821,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,92.7808,0,13.02,6511.87, -256,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,78.873,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,54.6731,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,133.5461,0,18.09,4525.58, -256,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,82.7604,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,60.2229,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,142.9833,0,33.79,4229.63, -256,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,83.8868,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,61.8545,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,145.7413,0,66.31,4154.98, -256,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,84.7124,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,66.4881,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.2005,0,127.83,4015.37, -256,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,86.6188,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.3428,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,169.96159999999998,0,227.43,3590.64, -256,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.6237,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,125.9531,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,218.5768,0,353.69,2820.81, -256,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,51.0702,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,41.1541,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,92.2243,0,13.1,6551.16, -256,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,51.0702,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,41.1541,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,92.2243,0,13.1,6551.16, -256,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,51.0702,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,41.1541,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,92.2243,0,13.1,6551.16, -256,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,51.0702,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,41.1541,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,92.2243,0,13.1,6551.16, -256,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,51.0702,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,41.1541,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,92.2243,0,13.1,6551.16, -256,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,81.6357,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,52.029,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,133.6647,0,18.07,4521.56, -256,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,84.8817,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,56.8257,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,141.7074,0,34.1,4267.71, -256,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,86.191,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,58.783,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,144.974,0,66.66,4176.97, -256,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,86.7569,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,64.6359,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,151.39280000000002,0,127.66,4010.27, -256,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,88.2869,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,82.4745,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,170.7614,0,226.37,3573.82, -256,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.1182,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,125.1912,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.3094,0,354.13,2824.26, -256,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,76.4501,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x192E,0.0%,0.0,Null,0,76.4501,1,15.8,7902.89, -256,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,76.4501,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x192E,0.0%,0.0,Null,0,76.4501,1,15.8,7902.89, -256,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,76.4501,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x192E,0.0%,0.0,Null,0,76.4501,1,15.8,7902.89, -256,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,76.4501,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x192E,0.0%,0.0,Null,0,76.4501,1,15.8,7902.89, -256,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,76.4501,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x192E,0.0%,0.0,Null,0,76.4501,1,15.8,7902.89, -256,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,104.0048,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x192E,0.0%,0.0,Null,0,104.0048,1,23.23,5811.01, -256,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,117.2665,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x192E,0.0%,0.0,Null,0,117.2665,1,41.2,5157.2, -256,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,118.1045,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x192E,0.0%,0.0,Null,0,118.1045,1,81.82,5127.26, -256,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,122.2554,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_ps_32x192E,0.0%,0.0,Null,0,122.2554,1,158.09,4966.04, -256,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,148.475,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x384E,0.0%,0.0,Null,0,148.475,1,260.34,4110.26, -256,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,92.8309,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.4309,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,252.2618,0,306.46,2444.14, -256,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,49.6325,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,64.2871,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.9196,0,10.6,5303.53, -256,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,49.6325,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,64.2871,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.9196,0,10.6,5303.53, -256,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,49.6325,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,64.2871,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.9196,0,10.6,5303.53, -256,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,49.6325,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,64.2871,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.9196,0,10.6,5303.53, -256,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,49.6325,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,64.2871,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,113.9196,0,10.6,5303.53, -256,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,80.9233,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x64_pf3E,0.0%,84.6624,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,165.5857,0,14.59,3649.91, -256,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,84.5381,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x64_pf3E,0.0%,88.2359,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,172.774,0,27.97,3500.33, -256,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,85.2495,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x64_pf3E,0.0%,90.1821,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,175.4316,0,55.09,3451.79, -256,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,86.7616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x64_pf3E,0.0%,94.7695,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,181.5311,0,106.47,3344.47, -256,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,89.8093,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x64_pf3E,0.0%,111.8215,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,201.6308,0,191.71,3026.68, -256,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,94.4833,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,162.4132,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,256.8965,0,300.94,2400.04, -80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.4483,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.017,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,393.4653,0,3.58,7164.62, -80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.4483,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.017,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,393.4653,0,3.58,7164.62, -80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.4483,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.017,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,393.4653,0,3.58,7164.62, -80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.4483,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.017,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,393.4653,0,3.58,7164.62, -80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.4483,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.017,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,393.4653,0,3.58,7164.62, -80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,367.326,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,243.7681,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,611.0941,0,4.61,4613.84, -80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,485.1197,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,319.3392,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,804.4589000000001,0,7.01,3505.97, -80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,560.9087,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,389.4697,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,950.3784,0,11.86,2969.6, -80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,588.4224,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,400.0835,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,988.5059,0,22.81,2858.77, -80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,617.8863,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,423.4145,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1041.3008,0,43.31,2720.88, -80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, -80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,238.507,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,146.2522,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,384.7592,0,3.66,7326.74, -80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,238.507,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,146.2522,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,384.7592,0,3.66,7326.74, -80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,238.507,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,146.2522,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,384.7592,0,3.66,7326.74, -80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,238.507,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,146.2522,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,384.7592,0,3.66,7326.74, -80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,238.507,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,146.2522,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,384.7592,0,3.66,7326.74, -80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,376.8336,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,233.1548,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,609.9884,0,4.62,4622.2, -80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,484.4582,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,310.4899,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,794.9481,0,7.09,3547.91, -80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,560.7307,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,356.8918,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,917.6225,0,12.29,3075.6, -80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,591.6172,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,377.301,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,968.9182,0,23.27,2916.56, -80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,621.3374,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,402.5935,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1023.9309,0,44.04,2767.03, -80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, -80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,121.6233,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,117.3629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,238.9862,0,5.9,5898.37, -80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,121.6233,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,117.3629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,238.9862,0,5.9,5898.37, -80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,121.6233,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,117.3629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,238.9862,0,5.9,5898.37, -80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,121.6233,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,117.3629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,238.9862,0,5.9,5898.37, -80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,121.6233,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,117.3629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,238.9862,0,5.9,5898.37, -80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,194.485,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,176.724,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,371.209,0,7.59,3798.33, -80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,251.2934,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,233.6045,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,484.8979,0,11.63,2909.19, -80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,292.2127,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,265.0419,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,557.2546,0,20.23,2533.92, -80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,300.6316,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,279.9782,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,580.6098,0,38.84,2436.73, -80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,312.6244,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,296.8907,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,609.5151,0,73.99,2330.21, -80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,394.9761,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,427.6178,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,822.5939,0,109.65,1739.99, -80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,120.6124,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,113.2457,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,233.8581,0,6.03,6027.72, -80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,120.6124,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,113.2457,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,233.8581,0,6.03,6027.72, -80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,120.6124,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,113.2457,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,233.8581,0,6.03,6027.72, -80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,120.6124,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,113.2457,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,233.8581,0,6.03,6027.72, -80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,120.6124,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,113.2457,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,233.8581,0,6.03,6027.72, -80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,188.7522,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,170.2119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,358.96410000000003,0,7.85,3927.9, -80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,254.6539,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.1%,220.0918,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,474.7457,0,11.87,2971.41, -80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,290.9005,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,254.8242,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,545.7247,0,20.66,2587.46, -80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,299.2146,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,268.1991,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,567.4137000000001,0,39.74,2493.4, -80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,316.0831,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,287.2443,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,603.3274,0,74.75,2354.11, -80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, -80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,438.8756,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,256.4253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,695.3009,0,4.05,8108.15, -80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,438.8756,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,256.4253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,695.3009,0,4.05,8108.15, -80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,438.8756,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,256.4253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,695.3009,0,4.05,8108.15, -80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,438.8756,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,256.4253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,695.3009,0,4.05,8108.15, -80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,438.8756,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,256.4253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,695.3009,0,4.05,8108.15, -80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,714.79,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,400.2169,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,1115.0069,0,5.06,5056.53, -80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,998.4164,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,561.412,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,1559.8284,0,7.23,3615.13, -80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1118.5366,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,643.0011,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1761.5377,0,12.8,3202.21, -80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1174.4719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,676.1183,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1850.5902,0,24.37,3050.1, -80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1216.8417,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,726.8006,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1943.6423,0,46.4,2907.85, -80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,366.9854,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,263.1156,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,630.101,0,8.95,4474.3, -80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,504.1748,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,378.662,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,882.8368,0,12.77,3194.19, -80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,570.6719,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.1%,417.051,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,987.7229,0,22.83,2856.39, -80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,597.6775,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,432.5419,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1030.2194,0,43.77,2741.24, -80,1,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.0761,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,66.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,154.5774,0,3.91,3908.99, -80,2,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.0761,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,66.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,154.5774,0,3.91,3908.99, -80,4,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.0761,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,66.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,154.5774,0,3.91,3908.99, -80,8,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.0761,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,66.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,154.5774,0,3.91,3908.99, -80,16,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.0761,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,66.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,154.5774,0,3.91,3908.99, -80,32,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,116.9005,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,89.872,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,206.7725,0,5.84,2923.52, -80,64,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,130.2259,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,101.7908,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,232.0167,0,10.41,2607.69, -80,128,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,134.8012,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,104.118,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,238.9192,0,20.22,2536.74, -80,256,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,139.8396,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,107.8712,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,247.7108,0,39.01,2455.18, -80,512,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,176.0293,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,152.7806,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,328.8099,0,58.78,1862.38, -80,1024,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, -80,1,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.5875,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,61.2038,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,149.7913,0,4.03,4033.89, +80,4,2304,1536,8,2,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,17.6606,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,15.126,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.3%,32.7866,0,5.18,2591.37, +80,4,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,22.6085,moe_ck2stages_gemm1_256x16x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,21.8513,moe_ck2stages_gemm2_64x16x64x64_1x1_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.1%,44.4598,0,3.4,6793.53, +80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,39.3207,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,12.9446,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.2%,52.2653,0,1.69,26964.5, +80,1,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,39.59,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,13.014,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.2%,52.604,0,1.88,26895.53, +80,8,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,26.7393,moe_ck2stages_gemm1_256x16x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,30.591,moe_ck2stages_gemm2_64x16x64x64_1x1_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.1%,57.3303,0,5.27,5269.26, +80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,40.8689,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,18.1489,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.2%,59.0178,0,2.98,23879.73, +80,2,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,41.4577,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,19.353,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.2%,60.8107,0,3.26,23266.2, +80,8,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.1892,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,34.1814,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.3706,0,4.17,4174.64, +80,16,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.1892,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,34.1814,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.3706,0,4.17,4174.64, +80,4,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.1892,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,34.1814,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.3706,0,4.17,4174.64, +80,1,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.1892,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,34.1814,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.3706,0,4.17,4174.64, +80,2,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.1892,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,34.1814,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.3706,0,4.17,4174.64, +80,1,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.6753,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,33.9165,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.5918,0,4.16,4161.92, +80,16,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.6753,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,33.9165,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.5918,0,4.16,4161.92, +80,4,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.6753,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,33.9165,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.5918,0,4.16,4161.92, +80,2,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.6753,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,33.9165,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.5918,0,4.16,4161.92, +80,8,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.6753,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,33.9165,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.5918,0,4.16,4161.92, +80,1,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,37.5796,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,36.5426,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.12219999999999,0,4.07,4075.98, +80,4,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,37.5796,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,36.5426,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.12219999999999,0,4.07,4075.98, +80,8,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,37.5796,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,36.5426,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.12219999999999,0,4.07,4075.98, +80,2,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,37.5796,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,36.5426,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.12219999999999,0,4.07,4075.98, +80,16,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,37.5796,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,36.5426,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.12219999999999,0,4.07,4075.98, +80,1,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,38.52,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,36.2602,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.78020000000001,0,4.04,4040.12, +80,8,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,38.52,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,36.2602,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.78020000000001,0,4.04,4040.12, +80,2,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,38.52,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,36.2602,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.78020000000001,0,4.04,4040.12, +80,16,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,38.52,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,36.2602,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.78020000000001,0,4.04,4040.12, +80,4,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,38.52,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,36.2602,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.78020000000001,0,4.04,4040.12, +80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,46.2328,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.7%,28.8818,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,75.1146,0,4.69,18762.96, +80,4,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,48.5685,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,32.2906,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,80.8591,0,4.9,17498.06, +80,16,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,44.9474,moe_ck2stages_gemm1_256x16x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,44.429,moe_ck2stages_gemm2_64x16x64x64_1x1_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.1%,89.3764,0,6.76,3381.05, +80,32,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,59.0984,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,45.8963,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,104.9947,0,5.75,2878.74, +80,32,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,59.5728,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,45.8094,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,105.3822,0,5.73,2868.15, +80,32,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,58.4576,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,49.4407,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,107.8983,0,5.6,2801.27, +80,32,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,58.5697,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,49.5953,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,108.165,0,5.58,2794.36, +80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,64.2093,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,1.8%,45.801,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,110.0103,0,6.41,12812.06, +80,64,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,62.2801,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,51.218,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,113.4981,0,10.64,2665.37, +80,64,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,62.4816,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,51.5263,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,114.0079,0,10.6,2653.45, +80,128,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,64.8002,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,52.9275,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,117.7277,0,20.52,2574.06, +80,64,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,62.2694,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,55.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,117.7707,0,10.26,2568.67, +80,64,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,62.6056,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,55.5345,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,118.1401,0,10.22,2560.64, +80,128,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,65.477,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,53.4665,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,118.9435,0,20.31,2547.75, +80,256,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,65.5874,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,55.6592,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,121.2466,0,39.85,2508.0, +80,128,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,63.8996,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,57.8423,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,121.7419,0,19.84,2489.19, +80,128,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,64.7876,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,57.4819,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,122.2695,0,19.76,2478.45, +80,256,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,68.4938,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,55.1259,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,123.6197,0,39.09,2459.86, +80,8,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,72.5315,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,52.4907,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,125.0222,0,6.34,11317.7, +80,256,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,65.3992,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,59.9428,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,125.34199999999998,0,38.55,2426.06, +80,256,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,66.8949,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,59.7486,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,126.64350000000002,0,38.15,2401.13, +80,32,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,130.6754,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,130.6754,1,9.24,2314.0, +80,40,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,132.1174,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,132.1174,1,11.43,2289.49, +80,64,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,135.0415,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,135.0415,1,17.89,2242.1, +80,128,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,136.8626,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,136.8626,1,35.3,2218.01, +80,256,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,144.1889,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,144.1889,1,67.02,2116.22, +80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,83.7141,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.4684,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,148.1825,0,8.15,4077.25, +80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,83.7141,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.4684,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,148.1825,0,8.15,4077.25, +80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,83.7141,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.4684,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,148.1825,0,8.15,4077.25, +80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,83.7141,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.4684,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,148.1825,0,8.15,4077.25, +80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,83.7141,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.4684,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,148.1825,0,8.15,4077.25, 80,2,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.5875,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,61.2038,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,149.7913,0,4.03,4033.89, 80,4,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.5875,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,61.2038,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,149.7913,0,4.03,4033.89, -80,8,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.5875,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,61.2038,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,149.7913,0,4.03,4033.89, +80,1,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.5875,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,61.2038,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,149.7913,0,4.03,4033.89, 80,16,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.5875,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,61.2038,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,149.7913,0,4.03,4033.89, -80,32,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,116.7801,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,82.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,199.298,0,6.06,3033.17, -80,64,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,130.0463,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,93.2196,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,223.2659,0,10.82,2709.9, -80,128,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,133.8064,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,96.146,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,229.9524,0,21.01,2635.66, -80,256,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,136.6552,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,99.1414,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,235.7966,0,40.98,2579.23, -80,512,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,177.8056,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,140.1446,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,317.9502,0,60.79,1925.99, -80,1024,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, -80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,160.613,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,97.024,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,257.637,0,4.69,4689.63, -80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,160.613,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,97.024,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,257.637,0,4.69,4689.63, -80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,160.613,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,97.024,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,257.637,0,4.69,4689.63, -80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,160.613,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,97.024,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,257.637,0,4.69,4689.63, -80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,160.613,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,97.024,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,257.637,0,4.69,4689.63, -80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,214.8572,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,129.0874,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,343.94460000000004,0,7.02,3513.6, -80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.0028,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,151.5356,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,389.5384,0,12.4,3103.69, -80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,256.0,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.7118,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,411.7118,0,23.47,2939.09, -80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,268.9473,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,167.0488,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,435.9961,0,44.33,2780.19, -80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,347.7216,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,219.8661,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,567.5877,0,68.1,2143.01, -80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, -80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,165.7381,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,94.3172,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,260.0553,0,4.65,4646.02, -80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,165.7381,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,94.3172,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,260.0553,0,4.65,4646.02, -80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,165.7381,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,94.3172,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,260.0553,0,4.65,4646.02, -80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,165.7381,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,94.3172,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,260.0553,0,4.65,4646.02, -80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,165.7381,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,94.3172,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,260.0553,0,4.65,4646.02, -80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,216.9447,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,126.1828,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,343.1275,0,7.04,3521.97, -80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,246.22,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,145.7239,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,391.9439,0,12.33,3084.65, -80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,255.4375,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,152.2483,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,407.6858,0,23.7,2968.11, -80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,271.8789,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,161.456,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,433.3349,0,44.6,2797.27, -80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,347.2111,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,210.4569,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,557.668,0,69.31,2181.13, -80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, -80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,82.6308,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,68.157,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,150.7878,0,8.01,4006.8, -80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,82.6308,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,68.157,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,150.7878,0,8.01,4006.8, +80,8,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.5875,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,61.2038,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,149.7913,0,4.03,4033.89, +80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,84.3415,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,65.6237,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,149.96519999999998,0,8.05,4028.78, +80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,84.3415,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,65.6237,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,149.96519999999998,0,8.05,4028.78, +80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,84.3415,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,65.6237,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,149.96519999999998,0,8.05,4028.78, +80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,84.3415,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,65.6237,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,149.96519999999998,0,8.05,4028.78, +80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,84.3415,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,65.6237,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,149.96519999999998,0,8.05,4028.78, +80,1,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.3641,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,61.6683,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,150.0324,0,4.03,4027.41, +80,4,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.3641,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,61.6683,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,150.0324,0,4.03,4027.41, +80,16,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.3641,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,61.6683,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,150.0324,0,4.03,4027.41, +80,8,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.3641,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,61.6683,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,150.0324,0,4.03,4027.41, +80,2,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.3641,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,61.6683,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,150.0324,0,4.03,4027.41, 80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,82.6308,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,68.157,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,150.7878,0,8.01,4006.8, -80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,82.6308,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,68.157,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,150.7878,0,8.01,4006.8, +80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,82.6308,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,68.157,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,150.7878,0,8.01,4006.8, 80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,82.6308,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,68.157,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,150.7878,0,8.01,4006.8, -80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,109.4253,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,88.2541,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,197.6794,0,12.22,3057.34, -80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,127.2465,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,102.2132,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,229.4597,0,21.06,2635.61, -80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,132.4286,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,105.8564,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,238.285,0,40.56,2541.3, -80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,134.0808,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,109.8052,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,243.886,0,79.25,2489.38, -80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,179.6681,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,155.5374,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,335.20550000000003,0,115.32,1820.59, -80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, -80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,83.7141,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.4684,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,148.1825,0,8.15,4077.25, -80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,83.7141,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.4684,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,148.1825,0,8.15,4077.25, -80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,83.7141,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.4684,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,148.1825,0,8.15,4077.25, -80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,83.7141,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.4684,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,148.1825,0,8.15,4077.25, -80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,83.7141,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,64.4684,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,148.1825,0,8.15,4077.25, -80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,109.0496,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,83.8951,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,192.9447,0,12.52,3132.36, -80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,127.6734,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,97.1704,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,224.8438,0,21.49,2689.72, -80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,129.4139,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,101.0604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,230.47430000000003,0,41.93,2627.42, -80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,134.708,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,105.5023,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,240.2103,0,80.46,2527.47, -80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,178.7076,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,147.3146,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,326.0222,0,118.56,1871.87, -80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, +80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,82.6308,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,68.157,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,150.7878,0,8.01,4006.8, +80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,82.6308,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,68.157,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,150.7878,0,8.01,4006.8, +80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,83.611,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,67.927,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.538,0,7.97,3986.96, +80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,83.611,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,67.927,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.538,0,7.97,3986.96, +80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,83.611,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,67.927,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.538,0,7.97,3986.96, +80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,83.611,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,67.927,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.538,0,7.97,3986.96, +80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,83.611,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,67.927,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.538,0,7.97,3986.96, +80,1,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.0761,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,66.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,154.5774,0,3.91,3908.99, +80,8,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.0761,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,66.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,154.5774,0,3.91,3908.99, +80,16,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.0761,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,66.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,154.5774,0,3.91,3908.99, +80,4,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.0761,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,66.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,154.5774,0,3.91,3908.99, +80,2,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.0761,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,66.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,154.5774,0,3.91,3908.99, +80,2,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.9501,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,66.7261,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,155.6762,0,3.88,3881.4, +80,16,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.9501,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,66.7261,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,155.6762,0,3.88,3881.4, +80,4,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.9501,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,66.7261,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,155.6762,0,3.88,3881.4, +80,8,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.9501,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,66.7261,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,155.6762,0,3.88,3881.4, +80,1,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.9501,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,66.7261,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,155.6762,0,3.88,3881.4, +80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,82.1262,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,74.17,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,156.2962,0,7.73,3865.59, +80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,82.1262,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,74.17,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,156.2962,0,7.73,3865.59, +80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,82.1262,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,74.17,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,156.2962,0,7.73,3865.59, +80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,82.1262,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,74.17,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,156.2962,0,7.73,3865.59, +80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,82.1262,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,74.17,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,156.2962,0,7.73,3865.59, +80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.4918,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,76.3624,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,158.8542,0,7.6,3803.34, +80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.4918,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,76.3624,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,158.8542,0,7.6,3803.34, +80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.4918,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,76.3624,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,158.8542,0,7.6,3803.34, +80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.4918,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,76.3624,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,158.8542,0,7.6,3803.34, +80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.4918,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,76.3624,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,158.8542,0,7.6,3803.34, +80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,84.9173,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,74.1307,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,159.048,0,7.59,3798.7, +80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,84.9173,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,74.1307,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,159.048,0,7.59,3798.7, +80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,84.9173,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,74.1307,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,159.048,0,7.59,3798.7, +80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,84.9173,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,74.1307,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,159.048,0,7.59,3798.7, +80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,84.9173,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,74.1307,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,159.048,0,7.59,3798.7, 80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.7173,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.3%,76.402,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,159.1193,0,7.59,3797.0, -80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.7173,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.3%,76.402,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,159.1193,0,7.59,3797.0, -80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.7173,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.3%,76.402,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,159.1193,0,7.59,3797.0, 80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.7173,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.3%,76.402,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,159.1193,0,7.59,3797.0, +80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.7173,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.3%,76.402,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,159.1193,0,7.59,3797.0, 80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.7173,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.3%,76.402,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,159.1193,0,7.59,3797.0, -80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,109.8209,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,99.0655,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,208.8864,0,11.57,2893.31, -80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,119.69,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,114.5365,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,234.2265,0,20.63,2581.97, -80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,131.0244,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,119.5634,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,250.5878,0,38.56,2416.53, -80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,129.4978,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,126.7846,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,256.2824,0,75.41,2368.97, -80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,174.904,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,184.6529,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,359.5569,0,107.51,1697.29, -80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, -80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,82.1262,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,74.17,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,156.2962,0,7.73,3865.59, -80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,82.1262,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,74.17,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,156.2962,0,7.73,3865.59, -80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,82.1262,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,74.17,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,156.2962,0,7.73,3865.59, -80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,82.1262,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,74.17,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,156.2962,0,7.73,3865.59, -80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,82.1262,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,74.17,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,156.2962,0,7.73,3865.59, +80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.7173,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.3%,76.402,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,159.1193,0,7.59,3797.0, +80,512,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,94.7864,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,76.6968,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,171.4832,0,56.35,1785.51, +80,512,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,95.8693,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,76.473,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,172.3423,0,56.07,1776.61, +80,512,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,93.5238,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,83.188,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,176.71179999999998,0,54.69,1732.68, +80,512,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,94.7614,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,83.103,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,177.8644,0,54.33,1721.45, +80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,116.5659,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,74.8227,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,191.3886,0,7.36,7365.28, +80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,109.0496,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,83.8951,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,192.9447,0,12.52,3132.36, +80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,109.5369,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.7014,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,193.2383,0,12.5,3127.6, +80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,107.6661,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,87.4868,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,195.1529,0,12.38,3096.92, +80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,109.4253,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,88.2541,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,197.6794,0,12.22,3057.34, +80,32,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,116.7801,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,82.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,199.298,0,6.06,3033.17, +80,32,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,116.2949,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,83.4229,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,199.7178,0,6.05,3026.79, +80,16,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,122.8339,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,81.6731,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,204.507,0,7.75,6919.74, 80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,110.0096,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,95.6992,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,205.7088,0,11.74,2938.0, -80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,125.1757,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,111.2889,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,236.4646,0,20.43,2557.53, -80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,129.4724,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,116.1173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,245.5897,0,39.35,2465.71, -80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,136.164,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,123.0773,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,259.2413,0,74.55,2341.93, -80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,173.4458,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,178.424,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,351.8698,0,109.86,1734.37, -80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,268.8237,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,301.9153,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,570.739,0,135.45,1080.29, -80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,165.0797,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,96.9624,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,262.0421,0,4.61,4610.79, -80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,165.0797,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,96.9624,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,262.0421,0,4.61,4610.79, -80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,165.0797,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,96.9624,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,262.0421,0,4.61,4610.79, -80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,165.0797,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,96.9624,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,262.0421,0,4.61,4610.79, -80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,165.0797,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,96.9624,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,262.0421,0,4.61,4610.79, -80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,217.0102,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,129.4305,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,346.4407,0,6.97,3488.28, -80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.1054,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,150.0225,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,391.1279,0,12.35,3091.08, -80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,256.7095,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,157.2573,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,413.9668,0,23.34,2923.08, -80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,265.3977,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,166.4318,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,431.8295,0,44.76,2807.02, -80,1,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,38.52,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,36.2602,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.78020000000001,0,4.04,4040.12, -80,2,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,38.52,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,36.2602,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.78020000000001,0,4.04,4040.12, -80,4,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,38.52,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,36.2602,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.78020000000001,0,4.04,4040.12, -80,8,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,38.52,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,36.2602,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.78020000000001,0,4.04,4040.12, -80,16,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,38.52,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,36.2602,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.78020000000001,0,4.04,4040.12, -80,32,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,58.4576,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,49.4407,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,107.8983,0,5.6,2801.27, -80,64,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,62.2694,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,55.5013,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,117.7707,0,10.26,2568.67, -80,128,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,64.7876,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,57.4819,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,122.2695,0,19.76,2478.45, -80,256,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,66.8949,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,59.7486,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,126.64350000000002,0,38.15,2401.13, -80,512,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,94.7614,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,83.103,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,177.8644,0,54.33,1721.45, -80,1024,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, -80,1,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.6753,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,33.9165,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.5918,0,4.16,4161.92, -80,2,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.6753,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,33.9165,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.5918,0,4.16,4161.92, -80,4,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.6753,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,33.9165,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.5918,0,4.16,4161.92, -80,8,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.6753,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,33.9165,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.5918,0,4.16,4161.92, -80,16,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.6753,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,33.9165,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.5918,0,4.16,4161.92, -80,32,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,59.5728,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,45.8094,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,105.3822,0,5.73,2868.15, -80,64,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,62.2801,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,51.218,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,113.4981,0,10.64,2665.37, -80,128,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,64.8002,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,52.9275,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,117.7277,0,20.52,2574.06, -80,256,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,68.4938,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,55.1259,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,123.6197,0,39.09,2459.86, -80,512,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,95.8693,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,76.473,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,172.3423,0,56.07,1776.61, -80,1024,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, -80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,191.3917,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,200.8431,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,392.2348,0,7.19,3594.72, -80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,243.4825,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,262.4499,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,505.9324,0,11.14,2788.24, -80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,287.1619,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,303.9818,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,591.1437000000001,0,19.07,2388.66, -80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,292.8938,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,319.6213,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,612.5151000000001,0,36.81,2309.81, -80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,305.4473,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,349.9039,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,655.3512000000001,0,68.81,2167.23, -80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, -80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.3413,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,156.5265,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,397.8678,0,3.54,7085.35, -80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.3413,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,156.5265,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,397.8678,0,3.54,7085.35, -80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.3413,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,156.5265,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,397.8678,0,3.54,7085.35, -80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.3413,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,156.5265,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,397.8678,0,3.54,7085.35, -80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.3413,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,156.5265,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,397.8678,0,3.54,7085.35, -80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,368.8396,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,244.9596,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,613.7992,0,4.59,4593.51, -80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,486.2218,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,330.374,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,816.5958,0,6.9,3453.86, -80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,569.5169,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.7784,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,942.2953,0,11.96,2995.07, -80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,587.3979,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,399.9114,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,987.3093,0,22.84,2862.24, -80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,619.5779,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,428.3394,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1047.9173,0,43.04,2703.7, -80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, -80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,239.8912,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,147.6741,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,387.5653,0,3.64,7273.69, -80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,239.8912,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,147.6741,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,387.5653,0,3.64,7273.69, -80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,239.8912,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,147.6741,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,387.5653,0,3.64,7273.69, -80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,239.8912,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,147.6741,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,387.5653,0,3.64,7273.69, -80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,239.8912,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,147.6741,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,387.5653,0,3.64,7273.69, -80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,368.433,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,237.6243,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,606.0572999999999,0,4.65,4652.18, -80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,493.7026,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,313.5552,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,807.2578000000001,0,6.98,3493.81, -80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,568.3004,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,363.255,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,931.5554,0,12.1,3029.6, -80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,593.7672,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,382.7183,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,976.4855,0,23.09,2893.96, -80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,617.3315,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,411.0096,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1028.3411,0,43.85,2755.17, -80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, -80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,118.5389,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,118.5512,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,237.0901,0,5.94,5945.55, +80,32,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,115.8935,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,90.1026,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,205.9961,0,5.86,2934.54, +80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,110.1287,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,96.0451,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,206.1738,0,11.72,2931.38, +80,32,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,116.9005,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,89.872,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,206.7725,0,5.84,2923.52, +80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.9899,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,81.8591,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,206.849,0,6.81,6814.78, +80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.9899,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,81.8591,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,206.849,0,6.81,6814.78, +80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.9899,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,81.8591,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,206.849,0,6.81,6814.78, +80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.9899,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,81.8591,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,206.849,0,6.81,6814.78, +80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.9899,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,81.8591,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,206.849,0,6.81,6814.78, +80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,123.5681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,83.7681,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,207.3362,0,6.8,6798.77, +80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,123.5681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,83.7681,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,207.3362,0,6.8,6798.77, +80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,123.5681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,83.7681,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,207.3362,0,6.8,6798.77, +80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,123.5681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,83.7681,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,207.3362,0,6.8,6798.77, +80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,123.5681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,83.7681,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,207.3362,0,6.8,6798.77, +80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,124.4194,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.1785,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,207.5979,0,6.79,6790.19, +80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,124.4194,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.1785,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,207.5979,0,6.79,6790.19, +80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,124.4194,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.1785,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,207.5979,0,6.79,6790.19, +80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,124.4194,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.1785,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,207.5979,0,6.79,6790.19, +80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,124.4194,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.1785,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,207.5979,0,6.79,6790.19, +80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.1564,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.5223,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,207.6787,0,6.79,6787.55, +80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.1564,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.5223,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,207.6787,0,6.79,6787.55, +80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.1564,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.5223,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,207.6787,0,6.79,6787.55, +80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.1564,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.5223,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,207.6787,0,6.79,6787.55, +80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.1564,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.5223,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,207.6787,0,6.79,6787.55, +80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,108.8976,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,99.2988,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,208.1964,0,11.6,2902.9, +80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,109.8209,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,99.0655,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,208.8864,0,11.57,2893.31, +80,512,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,211.5621,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,211.5621,1,91.36,1457.17, +80,64,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,128.528,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,93.9786,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,222.5066,0,10.86,2719.15, +80,64,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,130.0463,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,93.2196,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,223.2659,0,10.82,2709.9, +80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,127.6734,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,97.1704,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,224.8438,0,21.49,2689.72, +80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,125.0556,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,101.2628,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,226.3184,0,21.35,2672.19, +80,64,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,124.4518,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,102.1206,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,226.5724,0,10.66,2670.35, +80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,130.7478,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,96.7112,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,227.459,0,21.24,2658.79, +80,128,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,131.5616,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.8169,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,228.3785,0,21.16,2653.83, +80,56,6144,4096,8,2,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,228.7482,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.5%,0.0,Null,0.0%,228.7482,1,73.93,2644.88, +80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,127.2465,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,102.2132,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,229.4597,0,21.06,2635.61, +80,128,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,133.8064,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,96.146,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,229.9524,0,21.01,2635.66, +80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,129.4139,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,101.0604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,230.47430000000003,0,41.93,2627.42, +80,64,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,130.2259,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,101.7908,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,232.0167,0,10.41,2607.69, +80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,130.8193,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,102.1228,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,232.9421,0,41.49,2599.58, +80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,120.6124,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,113.2457,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,233.8581,0,6.03,6027.72, +80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,120.6124,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,113.2457,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,233.8581,0,6.03,6027.72, +80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,120.6124,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,113.2457,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,233.8581,0,6.03,6027.72, +80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,120.6124,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,113.2457,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,233.8581,0,6.03,6027.72, +80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,120.6124,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,113.2457,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,233.8581,0,6.03,6027.72, +80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,119.69,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,114.5365,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,234.2265,0,20.63,2581.97, +80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,121.9866,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.0687,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,235.0553,0,6.0,5997.02, +80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,121.9866,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.0687,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,235.0553,0,6.0,5997.02, +80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,121.9866,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.0687,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,235.0553,0,6.0,5997.02, +80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,121.9866,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.0687,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,235.0553,0,6.0,5997.02, +80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,121.9866,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.0687,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,235.0553,0,6.0,5997.02, +80,256,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,136.6552,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,99.1414,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,235.7966,0,40.98,2579.23, +80,128,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,131.7229,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,104.731,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,236.4539,0,20.43,2563.19, +80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,125.1757,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,111.2889,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,236.4646,0,20.43,2557.53, +80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,130.9247,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,105.8912,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,236.8159,0,40.81,2557.06, 80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,118.5389,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,118.5512,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,237.0901,0,5.94,5945.55, -80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,118.5389,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,118.5512,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,237.0901,0,5.94,5945.55, 80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,118.5389,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,118.5512,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,237.0901,0,5.94,5945.55, 80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,118.5389,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,118.5512,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,237.0901,0,5.94,5945.55, -80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,120.5717,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,117.6181,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,238.1898,0,5.92,5918.1, +80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,118.5389,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,118.5512,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,237.0901,0,5.94,5945.55, +80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,118.5389,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,118.5512,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,237.0901,0,5.94,5945.55, +80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,126.6253,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,111.3536,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,237.9789,0,20.3,2541.26, +80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,120.5717,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,117.6181,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,238.1898,0,5.92,5918.1, 80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,120.5717,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,117.6181,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,238.1898,0,5.92,5918.1, -80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,120.5717,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,117.6181,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,238.1898,0,5.92,5918.1, 80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,120.5717,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,117.6181,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,238.1898,0,5.92,5918.1, -80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,120.5717,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,117.6181,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,238.1898,0,5.92,5918.1, -80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,188.5396,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,176.4776,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,365.0172,0,7.72,3862.76, -80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,252.7997,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,230.1231,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,482.9228,0,11.67,2921.09, -80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,278.3578,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,264.9256,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,543.2834,0,20.75,2599.08, -80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,301.0401,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,279.1768,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,580.2169,0,38.86,2438.38, -80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,314.4244,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,297.6385,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,612.0629,0,73.68,2320.51, -80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, -80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,121.9866,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.0687,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,235.0553,0,6.0,5997.02, -80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,121.9866,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.0687,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,235.0553,0,6.0,5997.02, -80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,121.9866,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.0687,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,235.0553,0,6.0,5997.02, -80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,121.9866,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.0687,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,235.0553,0,6.0,5997.02, -80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,121.9866,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,113.0687,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,235.0553,0,6.0,5997.02, -80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,191.3402,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,169.9426,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,361.2828,0,7.8,3902.69, -80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,253.0207,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,220.8733,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,473.894,0,11.9,2976.75, -80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,290.3441,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,254.7127,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,545.0568000000001,0,20.68,2590.63, -80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,300.1714,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,268.3368,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,568.5082,0,39.66,2488.6, -80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,304.03,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,287.1332,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,591.1632,0,76.29,2402.55, -80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, -80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, -80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,237.9312,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,159.3483,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,397.2795,0,7.09,7095.55, -80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,237.9312,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,159.3483,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,397.2795,0,7.09,7095.55, -80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,237.9312,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,159.3483,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,397.2795,0,7.09,7095.55, -80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,237.9312,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,159.3483,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,397.2795,0,7.09,7095.55, -80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,237.9312,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,159.3483,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,397.2795,0,7.09,7095.55, -80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,369.2362,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,241.8314,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.9%,611.0676,0,9.23,4613.66, -80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,510.6484,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,335.3164,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,845.9648,0,13.33,3333.41, -80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,582.0633,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,408.0174,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,990.0807,0,22.77,2849.59, -80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,615.4904,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,418.179,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1033.6694,0,43.63,2732.09, -80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, -80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,231.5308,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,154.9071,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,386.4379,0,7.29,7294.62, -80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,231.5308,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,154.9071,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,386.4379,0,7.29,7294.62, -80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,231.5308,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,154.9071,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,386.4379,0,7.29,7294.62, -80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,231.5308,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,154.9071,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,386.4379,0,7.29,7294.62, -80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,231.5308,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,154.9071,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,386.4379,0,7.29,7294.62, -80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,579.7023,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.1%,364.33,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,944.0323,0,23.89,2988.59, -80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,601.7144,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,402.2826,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1003.997,0,44.92,2812.83, -80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,625.392,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,411.3256,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1036.7176,0,87.0,2729.37, -80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, -80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,228.7896,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,171.5575,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,400.3471,0,7.04,7041.18, -80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,228.7896,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,171.5575,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,400.3471,0,7.04,7041.18, -80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,228.7896,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,171.5575,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,400.3471,0,7.04,7041.18, -80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,228.7896,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,171.5575,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,400.3471,0,7.04,7041.18, -80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,228.7896,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,171.5575,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,400.3471,0,7.04,7041.18, -80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,618.0944,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.1%,476.0445,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1094.1389,0,82.43,2586.13, -80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, -80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,231.8332,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,169.7631,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,401.5963,0,7.02,7019.28, -80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,231.8332,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,169.7631,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,401.5963,0,7.02,7019.28, -80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,231.8332,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,169.7631,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,401.5963,0,7.02,7019.28, -80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,231.8332,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,169.7631,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,401.5963,0,7.02,7019.28, -80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,231.8332,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,169.7631,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,401.5963,0,7.02,7019.28, -80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,367.8369,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,260.1737,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,628.0106000000001,0,8.98,4489.19, -80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,495.9422,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,361.1859,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,857.1281,0,13.15,3290.0, -80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,571.3622,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,410.8419,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,982.2041,0,22.96,2872.44, -80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,595.5339,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,429.8353,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1025.3692,0,43.98,2754.21, -80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,617.4891,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,481.3596,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1098.8487,0,82.08,2575.04, -80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,752.5409,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.4453,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1447.9861999999998,0,124.58,1961.75, -80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,351.9236,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,220.5397,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,572.4633,0,67.52,2124.76, -80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, -80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,161.7277,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.3101,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,258.0378,0,4.68,4682.34, -80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,161.7277,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.3101,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,258.0378,0,4.68,4682.34, -80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,161.7277,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.3101,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,258.0378,0,4.68,4682.34, -80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,161.7277,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.3101,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,258.0378,0,4.68,4682.34, -80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,161.7277,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.3101,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,258.0378,0,4.68,4682.34, -80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,209.4214,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,126.4843,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,335.9057,0,7.19,3597.69, -80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,240.9494,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,148.9541,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,389.9035,0,12.39,3100.79, -80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,257.2833,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,157.5746,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,414.8579,0,23.29,2916.8, -80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,269.3657,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,164.1089,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,433.4746,0,44.59,2796.37, -80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,352.9213,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,213.1932,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,566.1144999999999,0,68.28,2148.59, -80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, -80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,83.611,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,67.927,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.538,0,7.97,3986.96, -80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,83.611,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,67.927,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.538,0,7.97,3986.96, -80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,83.611,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,67.927,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.538,0,7.97,3986.96, -80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,83.611,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,67.927,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.538,0,7.97,3986.96, -80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,83.611,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,67.927,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,151.538,0,7.97,3986.96, -80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,107.6661,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,87.4868,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,195.1529,0,12.38,3096.92, -80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,125.0556,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,101.2628,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,226.3184,0,21.35,2672.19, -80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,130.9247,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,105.8912,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,236.8159,0,40.81,2557.06, -80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,135.4179,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,109.748,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,245.1659,0,78.83,2476.39, -80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,177.8313,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,155.4294,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,333.2607,0,115.99,1831.21, -80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, -80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,84.3415,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,65.6237,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,149.96519999999998,0,8.05,4028.78, -80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,84.3415,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,65.6237,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,149.96519999999998,0,8.05,4028.78, -80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,84.3415,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,65.6237,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,149.96519999999998,0,8.05,4028.78, -80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,84.3415,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,65.6237,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,149.96519999999998,0,8.05,4028.78, -80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,84.3415,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,65.6237,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,149.96519999999998,0,8.05,4028.78, -80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,109.5369,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.7014,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,193.2383,0,12.5,3127.6, -80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,130.7478,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,96.7112,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,227.459,0,21.24,2658.79, -80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,130.8193,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,102.1228,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,232.9421,0,41.49,2599.58, +80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,120.5717,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,117.6181,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,238.1898,0,5.92,5918.1, +80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,120.5717,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,117.6181,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,238.1898,0,5.92,5918.1, +80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,132.4286,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,105.8564,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,238.285,0,40.56,2541.3, +80,128,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,134.8012,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,104.118,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,238.9192,0,20.22,2536.74, +80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,121.6233,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,117.3629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,238.9862,0,5.9,5898.37, +80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,121.6233,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,117.3629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,238.9862,0,5.9,5898.37, +80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,121.6233,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,117.3629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,238.9862,0,5.9,5898.37, +80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,121.6233,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,117.3629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,238.9862,0,5.9,5898.37, +80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,121.6233,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,117.3629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,238.9862,0,5.9,5898.37, +80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,134.708,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,105.5023,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,240.2103,0,80.46,2527.47, 80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,134.1437,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,106.1464,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,240.2901,0,80.43,2526.64, -80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,179.8699,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,146.8478,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,326.71770000000004,0,118.31,1867.89, -80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, -80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.4918,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,76.3624,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,158.8542,0,7.6,3803.34, -80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.4918,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,76.3624,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,158.8542,0,7.6,3803.34, -80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.4918,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,76.3624,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,158.8542,0,7.6,3803.34, -80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.4918,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,76.3624,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,158.8542,0,7.6,3803.34, -80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,82.4918,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,76.3624,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,158.8542,0,7.6,3803.34, -80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,108.8976,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,99.2988,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,208.1964,0,11.6,2902.9, 80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,126.1647,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,114.79,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,240.9547,0,20.05,2509.88, +80,256,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,140.9059,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,100.6281,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,241.534,0,40.01,2517.96, +80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,134.0808,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,109.8052,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,243.886,0,79.25,2489.38, 80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,124.3466,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,119.6476,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,243.9942,0,39.61,2481.83, -80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,136.6099,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,127.3005,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,263.9104,0,73.23,2300.5, -80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,174.3438,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,184.6666,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,359.0104,0,107.67,1699.87, -80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, -80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,84.9173,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,74.1307,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,159.048,0,7.59,3798.7, -80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,84.9173,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,74.1307,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,159.048,0,7.59,3798.7, -80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,84.9173,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,74.1307,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,159.048,0,7.59,3798.7, -80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,84.9173,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,74.1307,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,159.048,0,7.59,3798.7, -80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,84.9173,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,74.1307,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,159.048,0,7.59,3798.7, -80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,110.1287,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,96.0451,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,206.1738,0,11.72,2931.38, -80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,126.6253,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,111.3536,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,237.9789,0,20.3,2541.26, +80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,135.4179,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,109.748,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,245.1659,0,78.83,2476.39, +80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,129.4724,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,116.1173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,245.5897,0,39.35,2465.71, +80,256,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,139.1257,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,108.5817,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,247.7074,0,39.01,2455.21, +80,256,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,139.8396,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,107.8712,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,247.7108,0,39.01,2455.18, 80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,132.4651,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,116.0108,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,248.4759,0,38.89,2437.07, +80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,131.0244,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,119.5634,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,250.5878,0,38.56,2416.53, +80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,119.4832,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,131.8606,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,251.3438,0,5.61,5608.37, +80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,119.4832,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,131.8606,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,251.3438,0,5.61,5608.37, +80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,119.4832,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,131.8606,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,251.3438,0,5.61,5608.37, +80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,119.4832,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,131.8606,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,251.3438,0,5.61,5608.37, +80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,119.4832,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,131.8606,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,251.3438,0,5.61,5608.37, 80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,130.1837,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,123.1176,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,253.3013,0,76.3,2396.85, -80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,177.7956,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,178.1446,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,355.9402,0,108.6,1714.53, -80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, -80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,120.3829,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,134.4734,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,254.8563,0,5.53,5531.08, +80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,120.3829,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,134.4734,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,254.8563,0,5.53,5531.08, 80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,120.3829,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,134.4734,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,254.8563,0,5.53,5531.08, +80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,120.3829,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,134.4734,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,254.8563,0,5.53,5531.08, 80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,120.3829,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,134.4734,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,254.8563,0,5.53,5531.08, -80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,120.3829,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,134.4734,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,254.8563,0,5.53,5531.08, 80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,120.3829,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf3E,0.0%,134.4734,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,254.8563,0,5.53,5531.08, -80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,188.6312,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,205.9065,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,394.5377,0,7.14,3573.74, -80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,249.8568,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf2E,0.0%,269.7859,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,519.6427,0,10.85,2714.68, -80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,285.1047,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,312.3733,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,597.478,0,18.87,2363.33, -80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,302.8986,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,328.0652,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,630.9638,0,35.74,2242.27, -80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,305.7627,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.1%,359.7434,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,665.5061000000001,0,67.76,2134.16, -80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,390.1172,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,531.1275,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,921.2447,0,97.9,1553.67, -80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,119.4832,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,131.8606,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,251.3438,0,5.61,5608.37, -80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,119.4832,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,131.8606,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,251.3438,0,5.61,5608.37, -80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,119.4832,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,131.8606,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,251.3438,0,5.61,5608.37, -80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,119.4832,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,131.8606,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,251.3438,0,5.61,5608.37, -80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,119.4832,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,131.8606,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,251.3438,0,5.61,5608.37, -80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,192.247,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,200.7748,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,393.0218,0,7.17,3587.52, -80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,247.1864,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,262.0145,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,509.2009,0,11.07,2770.35, -80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,280.8024,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,303.8392,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,584.6415999999999,0,19.28,2415.22, -80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,301.3686,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,321.4971,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,622.8657000000001,0,36.2,2271.42, -80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,310.6611,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,349.0712,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,659.7322999999999,0,68.36,2152.84, -80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, -80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, -80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,442.5097,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,249.4045,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,691.9142,0,4.07,8147.84, -80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,442.5097,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,249.4045,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,691.9142,0,4.07,8147.84, -80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,442.5097,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,249.4045,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,691.9142,0,4.07,8147.84, -80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,442.5097,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,249.4045,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,691.9142,0,4.07,8147.84, -80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,442.5097,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,249.4045,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,691.9142,0,4.07,8147.84, -80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,708.8776,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,395.3054,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1104.183,0,5.11,5106.09, -80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1025.0488,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,559.4474,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1584.4962,0,7.12,3558.85, -80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1128.3663,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,636.7004,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1765.0667,0,12.77,3195.81, -80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1186.9346,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,673.7193,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1860.6539,0,24.24,3033.6, -80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1257.2784,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,722.716,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1979.9944,0,45.55,2854.46, -80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,625.1681,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf2E,0.0%,436.7729,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1061.941,0,84.93,2664.54, -80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,369.4651,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,236.5581,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,606.0232,0,9.3,4652.07, -80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,516.6341,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,344.0188,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,860.6529,0,13.1,3276.52, -80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,440.6719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,255.5291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,696.201,0,4.05,8097.67, -80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,440.6719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,255.5291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,696.201,0,4.05,8097.67, -80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,440.6719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,255.5291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,696.201,0,4.05,8097.67, -80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,440.6719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,255.5291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,696.201,0,4.05,8097.67, -80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,440.6719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,255.5291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,696.201,0,4.05,8097.67, -80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,714.05,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,401.6611,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,1115.7111,0,5.05,5053.34, -80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,985.8866,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,559.8234,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.5%,1545.71,0,7.29,3648.15, -80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1134.7112,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,643.4691,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1778.1803,0,12.68,3172.24, -80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1149.8246,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,670.1762,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1820.0008,0,24.78,3101.36, -80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1256.2287,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,729.0988,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1985.3275,0,45.43,2846.8, -80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, -80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,438.1766,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,247.4425,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,685.6191,0,4.11,8222.65, -80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,438.1766,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,247.4425,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,685.6191,0,4.11,8222.65, -80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,438.1766,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,247.4425,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,685.6191,0,4.11,8222.65, -80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,438.1766,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,247.4425,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,685.6191,0,4.11,8222.65, -80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,438.1766,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,247.4425,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,685.6191,0,4.11,8222.65, -80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,712.3917,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,389.8042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1102.1959,0,5.11,5115.3, -80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,979.7129,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,568.5654,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1548.2783,0,7.28,3642.1, -80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1135.1695,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,627.8855,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1763.0549999999998,0,12.79,3199.45, -80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1171.5851,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,662.7687,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1834.3538,0,24.58,3077.1, -80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,27.2341,moe_ck2stages_gemm1_256x128x128x128_1x4_TypeCastExpertWeight_v3_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,1749.6879,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1776.9219999999998,0,50.76,3180.68, -80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, -80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,230.2686,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,158.8229,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,389.0915,0,7.24,7244.87, -80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,230.2686,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,158.8229,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,389.0915,0,7.24,7244.87, -80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,230.2686,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,158.8229,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,389.0915,0,7.24,7244.87, -80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,230.2686,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,158.8229,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,389.0915,0,7.24,7244.87, -80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,230.2686,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,158.8229,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,389.0915,0,7.24,7244.87, -80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,372.7495,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.1%,246.5629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.5%,619.3124,0,9.1,4552.24, -80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,502.1489,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,339.9999,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,842.1488,0,13.39,3348.52, -80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,586.3089,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,394.6809,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,980.9898,0,22.99,2876.0, -80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,591.8466,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.1%,398.0473,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,989.8939,0,45.56,2852.91, -80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,626.1058,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf2E,0.0%,442.0281,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1068.1339,0,84.44,2649.09, -80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, -80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,227.7593,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,155.545,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,383.3043,0,7.35,7354.25, -80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,227.7593,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,155.545,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,383.3043,0,7.35,7354.25, -80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,227.7593,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,155.545,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,383.3043,0,7.35,7354.25, +80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,129.4978,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,126.7846,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,256.2824,0,75.41,2368.97, +80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,160.613,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,97.024,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,257.637,0,4.69,4689.63, +80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,160.613,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,97.024,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,257.637,0,4.69,4689.63, +80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,160.613,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,97.024,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,257.637,0,4.69,4689.63, +80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,160.613,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,97.024,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,257.637,0,4.69,4689.63, +80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,160.613,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,97.024,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,257.637,0,4.69,4689.63, +80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,161.7277,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.3101,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,258.0378,0,4.68,4682.34, +80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,161.7277,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.3101,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,258.0378,0,4.68,4682.34, +80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,161.7277,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.3101,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,258.0378,0,4.68,4682.34, +80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,161.7277,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.3101,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,258.0378,0,4.68,4682.34, +80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,161.7277,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.3101,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,258.0378,0,4.68,4682.34, +80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,136.164,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,123.0773,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,259.2413,0,74.55,2341.93, +80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,165.7381,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,94.3172,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,260.0553,0,4.65,4646.02, +80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,165.7381,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,94.3172,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,260.0553,0,4.65,4646.02, +80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,165.7381,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,94.3172,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,260.0553,0,4.65,4646.02, +80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,165.7381,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,94.3172,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,260.0553,0,4.65,4646.02, +80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,165.7381,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,94.3172,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,260.0553,0,4.65,4646.02, +80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,165.0797,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,96.9624,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,262.0421,0,4.61,4610.79, +80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,165.0797,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,96.9624,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,262.0421,0,4.61,4610.79, +80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,165.0797,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,96.9624,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,262.0421,0,4.61,4610.79, +80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,165.0797,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,96.9624,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,262.0421,0,4.61,4610.79, +80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,165.0797,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,96.9624,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,262.0421,0,4.61,4610.79, +80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,136.6099,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,127.3005,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,263.9104,0,73.23,2300.5, +80,16384,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, +80,2048,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, +80,8192,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, +80,4096,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, +80,32768,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, +80,1024,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, +80,16384,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, +80,2048,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, +80,4096,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, +80,1024,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, +80,32768,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, +80,8192,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, +80,2048,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, +80,1024,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, +80,16384,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, +80,8192,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, +80,4096,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, +80,32768,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, +80,4096,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, +80,32768,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, +80,8192,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, +80,1024,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, +80,16384,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, +80,2048,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, +80,1024,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,290.2352,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,290.2352,1,133.18,1083.85, +80,512,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,176.6934,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,140.5842,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,317.2776,0,60.92,1930.07, +80,512,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,177.8056,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,140.1446,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,317.9502,0,60.79,1925.99, +80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,192.7713,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,6.2%,126.9169,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,319.6882,0,8.82,4410.47, +80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,178.7076,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,147.3146,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,326.0222,0,118.56,1871.87, +80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,179.8699,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,146.8478,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,326.71770000000004,0,118.31,1867.89, +80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,198.0641,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,130.4169,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,328.481,0,8.58,4292.41, +80,512,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,176.0293,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,152.7806,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,328.8099,0,58.78,1862.38, +80,512,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,177.2587,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,152.9295,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,330.1882,0,58.53,1854.6, +80,32,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,201.5918,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,7.5%,129.3064,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,330.8982,0,9.58,4277.69, +80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,204.1789,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,127.8428,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,332.0217,0,8.49,4246.63, +80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,177.8313,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,155.4294,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,333.2607,0,115.99,1831.21, +80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,204.8947,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,128.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,333.8305,0,8.44,4223.62, +80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,205.3401,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,128.7497,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,334.0898,0,8.44,4220.35, +80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,179.6681,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,155.5374,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,335.20550000000003,0,115.32,1820.59, +80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,209.4214,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,126.4843,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,335.9057,0,7.19,3597.69, +80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,216.9447,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,126.1828,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,343.1275,0,7.04,3521.97, +80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,214.8572,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,129.0874,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,343.94460000000004,0,7.02,3513.6, +80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,217.0102,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,129.4305,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,346.4407,0,6.97,3488.28, +80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,173.4458,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,178.424,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,351.8698,0,109.86,1734.37, +80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,177.7956,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,178.1446,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,355.9402,0,108.6,1714.53, +80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,188.7522,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,170.2119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,358.96410000000003,0,7.85,3927.9, +80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,174.3438,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,184.6666,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,359.0104,0,107.67,1699.87, +80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,174.904,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,184.6529,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,359.5569,0,107.51,1697.29, +80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,191.3402,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,169.9426,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,361.2828,0,7.8,3902.69, +80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,188.5396,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,176.4776,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,365.0172,0,7.72,3862.76, +80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,194.485,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,176.724,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,371.209,0,7.59,3798.33, +80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,241.8455,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,132.6515,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,374.497,0,7.53,7527.21, +80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,241.8455,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,132.6515,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,374.497,0,7.53,7527.21, +80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,241.8455,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,132.6515,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,374.497,0,7.53,7527.21, +80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,241.8455,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,132.6515,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,374.497,0,7.53,7527.21, +80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,241.8455,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,132.6515,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,374.497,0,7.53,7527.21, +80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,245.7053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,130.3948,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.1001,0,7.49,7495.12, +80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,245.7053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,130.3948,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.1001,0,7.49,7495.12, +80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,245.7053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,130.3948,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.1001,0,7.49,7495.12, +80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,245.7053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,130.3948,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.1001,0,7.49,7495.12, +80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,245.7053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,130.3948,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.1001,0,7.49,7495.12, +80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,244.2105,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,132.0083,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.2188,0,7.49,7492.76, +80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,244.2105,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,132.0083,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.2188,0,7.49,7492.76, +80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,244.2105,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,132.0083,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.2188,0,7.49,7492.76, +80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,244.2105,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,132.0083,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.2188,0,7.49,7492.76, +80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,244.2105,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,132.0083,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.2188,0,7.49,7492.76, +80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,243.5773,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,134.825,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,378.4023,0,7.45,7449.52, +80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,243.5773,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,134.825,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,378.4023,0,7.45,7449.52, +80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,243.5773,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,134.825,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,378.4023,0,7.45,7449.52, +80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,243.5773,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,134.825,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,378.4023,0,7.45,7449.52, +80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,243.5773,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,134.825,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,378.4023,0,7.45,7449.52, 80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,227.7593,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,155.545,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,383.3043,0,7.35,7354.25, 80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,227.7593,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,155.545,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,383.3043,0,7.35,7354.25, -80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,370.0464,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,232.0003,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,602.0467,0,9.36,4682.79, -80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,509.0501,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,326.8678,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,835.9178999999999,0,13.49,3373.48, -80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,583.1963,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.1%,372.1123,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,955.3086,0,23.6,2953.31, -80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,610.1172,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,408.2756,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1018.3928,0,44.28,2773.07, -80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,624.0096,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf2E,0.0%,411.5878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1035.5974,0,87.09,2732.32, -80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,759.5362,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,596.1779,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1355.7141,0,133.06,2095.27, -80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,231.6437,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,173.6438,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,405.2875,0,6.95,6955.35, -80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,231.6437,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,173.6438,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,405.2875,0,6.95,6955.35, -80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,231.6437,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,173.6438,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,405.2875,0,6.95,6955.35, -80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,231.6437,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,173.6438,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,405.2875,0,6.95,6955.35, -80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,231.6437,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,173.6438,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,405.2875,0,6.95,6955.35, -80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,363.3125,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,265.3606,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,628.6731,0,8.97,4484.46, -80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,507.5861,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,362.0567,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,869.6428,0,12.96,3242.65, -80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,571.3867,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,421.5456,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,992.9323,0,22.71,2841.41, -80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,600.7597,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,432.403,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1033.1627,0,43.65,2733.43, -80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,620.397,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,467.8512,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1088.2482,0,82.88,2600.13, -80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, -80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,230.2993,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,169.4253,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,399.7246,0,7.05,7052.15, +80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,227.7593,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,155.545,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,383.3043,0,7.35,7354.25, +80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,227.7593,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,155.545,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,383.3043,0,7.35,7354.25, +80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,227.7593,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,155.545,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,383.3043,0,7.35,7354.25, +80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,238.507,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,146.2522,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,384.7592,0,3.66,7326.74, +80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,238.507,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,146.2522,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,384.7592,0,3.66,7326.74, +80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,238.507,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,146.2522,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,384.7592,0,3.66,7326.74, +80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,238.507,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,146.2522,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,384.7592,0,3.66,7326.74, +80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,238.507,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,146.2522,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,384.7592,0,3.66,7326.74, +80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,231.5308,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,154.9071,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,386.4379,0,7.29,7294.62, +80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,231.5308,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,154.9071,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,386.4379,0,7.29,7294.62, +80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,231.5308,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,154.9071,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,386.4379,0,7.29,7294.62, +80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,231.5308,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,154.9071,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,386.4379,0,7.29,7294.62, +80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,231.5308,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,154.9071,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,386.4379,0,7.29,7294.62, +80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,239.8912,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,147.6741,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,387.5653,0,3.64,7273.69, +80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,239.8912,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,147.6741,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,387.5653,0,3.64,7273.69, +80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,239.8912,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,147.6741,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,387.5653,0,3.64,7273.69, +80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,239.8912,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,147.6741,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,387.5653,0,3.64,7273.69, +80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,239.8912,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,147.6741,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,387.5653,0,3.64,7273.69, +80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,230.2686,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,158.8229,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,389.0915,0,7.24,7244.87, +80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,230.2686,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,158.8229,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,389.0915,0,7.24,7244.87, +80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,230.2686,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,158.8229,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,389.0915,0,7.24,7244.87, +80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,230.2686,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,158.8229,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,389.0915,0,7.24,7244.87, +80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,230.2686,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,158.8229,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,389.0915,0,7.24,7244.87, +80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.0028,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,151.5356,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.3%,389.5384,0,12.4,3103.69, +80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,240.9494,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,148.9541,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,389.9035,0,12.39,3100.79, +80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.1054,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,150.0225,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,391.1279,0,12.35,3091.08, +80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,246.22,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,145.7239,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,391.9439,0,12.33,3084.65, +80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,191.3917,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,200.8431,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,392.2348,0,7.19,3594.72, +80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,192.247,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,200.7748,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,393.0218,0,7.17,3587.52, +80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.4483,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.017,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,393.4653,0,3.58,7164.62, +80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.4483,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.017,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,393.4653,0,3.58,7164.62, +80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.4483,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.017,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,393.4653,0,3.58,7164.62, +80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.4483,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.017,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,393.4653,0,3.58,7164.62, +80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,238.4483,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.017,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,393.4653,0,3.58,7164.62, +80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,188.6312,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,205.9065,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,394.5377,0,7.14,3573.74, +80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,237.9312,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,159.3483,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,397.2795,0,7.09,7095.55, +80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,237.9312,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,159.3483,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,397.2795,0,7.09,7095.55, +80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,237.9312,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,159.3483,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,397.2795,0,7.09,7095.55, +80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,237.9312,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,159.3483,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,397.2795,0,7.09,7095.55, +80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,237.9312,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,159.3483,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,397.2795,0,7.09,7095.55, +80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.3413,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,156.5265,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,397.8678,0,3.54,7085.35, +80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.3413,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,156.5265,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,397.8678,0,3.54,7085.35, +80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.3413,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,156.5265,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,397.8678,0,3.54,7085.35, +80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.3413,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,156.5265,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,397.8678,0,3.54,7085.35, +80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,241.3413,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,156.5265,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.7%,397.8678,0,3.54,7085.35, +80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,230.2993,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,169.4253,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,399.7246,0,7.05,7052.15, 80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,230.2993,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,169.4253,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,399.7246,0,7.05,7052.15, 80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,230.2993,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,169.4253,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,399.7246,0,7.05,7052.15, -80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,230.2993,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,169.4253,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,399.7246,0,7.05,7052.15, 80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,230.2993,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,169.4253,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,399.7246,0,7.05,7052.15, -80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,363.9112,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,259.8372,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,623.7484,0,9.04,4519.87, -80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,510.4385,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,361.3331,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,871.7716,0,12.93,3234.73, -80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,570.8776,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,416.9556,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,987.8332,0,22.83,2856.07, -80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,591.7744,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,430.4532,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1022.2276,0,44.12,2762.67, -80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,619.0301,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf2E,0.0%,468.3093,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1087.3393999999998,0,82.95,2602.3, -80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,749.1329,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.2144,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1444.3473,0,124.89,1966.7, -80,1,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.9501,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,66.7261,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,155.6762,0,3.88,3881.4, -80,2,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.9501,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,66.7261,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,155.6762,0,3.88,3881.4, -80,4,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.9501,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,66.7261,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,155.6762,0,3.88,3881.4, -80,8,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.9501,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,66.7261,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,155.6762,0,3.88,3881.4, -80,16,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,88.9501,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,66.7261,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,155.6762,0,3.88,3881.4, -80,32,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,115.8935,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,90.1026,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,205.9961,0,5.86,2934.54, -80,64,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,124.4518,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,102.1206,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,226.5724,0,10.66,2670.35, -80,128,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,131.7229,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,104.731,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,236.4539,0,20.43,2563.19, -80,256,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,139.1257,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,108.5817,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,247.7074,0,39.01,2455.21, -80,512,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,177.2587,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,152.9295,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,330.1882,0,58.53,1854.6, -80,1024,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, -80,1,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.3641,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,61.6683,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,150.0324,0,4.03,4027.41, -80,2,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.3641,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,61.6683,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,150.0324,0,4.03,4027.41, -80,4,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.3641,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,61.6683,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,150.0324,0,4.03,4027.41, -80,8,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.3641,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,61.6683,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,150.0324,0,4.03,4027.41, -80,16,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,88.3641,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,61.6683,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,150.0324,0,4.03,4027.41, -80,32,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,116.2949,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,83.4229,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,199.7178,0,6.05,3026.79, -80,64,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,128.528,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,93.9786,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,222.5066,0,10.86,2719.15, -80,128,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,131.5616,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,96.8169,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,228.3785,0,21.16,2653.83, -80,256,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,140.9059,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,100.6281,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,241.534,0,40.01,2517.96, -80,512,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,176.6934,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,140.5842,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,317.2776,0,60.92,1930.07, -80,1024,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, -80,1,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,37.5796,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,36.5426,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.12219999999999,0,4.07,4075.98, -80,2,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,37.5796,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,36.5426,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.12219999999999,0,4.07,4075.98, -80,4,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,37.5796,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,36.5426,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.12219999999999,0,4.07,4075.98, -80,8,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,37.5796,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,36.5426,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.12219999999999,0,4.07,4075.98, -80,16,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,37.5796,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,36.5426,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,74.12219999999999,0,4.07,4075.98, -80,32,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,58.5697,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,49.5953,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,108.165,0,5.58,2794.36, -80,64,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,62.6056,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,55.5345,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,118.1401,0,10.22,2560.64, -80,128,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,63.8996,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,57.8423,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,121.7419,0,19.84,2489.19, -80,256,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,65.3992,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,59.9428,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,125.34199999999998,0,38.55,2426.06, -80,512,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,93.5238,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,83.188,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,176.71179999999998,0,54.69,1732.68, -80,1024,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, -80,1,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.1892,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,34.1814,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.3706,0,4.17,4174.64, -80,2,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.1892,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,34.1814,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.3706,0,4.17,4174.64, -80,4,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.1892,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,34.1814,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.3706,0,4.17,4174.64, -80,8,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.1892,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,34.1814,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.3706,0,4.17,4174.64, -80,16,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,38.1892,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,34.1814,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,72.3706,0,4.17,4174.64, -80,32,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,59.0984,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,45.8963,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,104.9947,0,5.75,2878.74, -80,64,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,62.4816,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,51.5263,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,114.0079,0,10.6,2653.45, -80,128,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,65.477,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,53.4665,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,118.9435,0,20.31,2547.75, -80,256,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,65.5874,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,55.6592,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,121.2466,0,39.85,2508.0, -80,512,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,94.7864,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,76.6968,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,171.4832,0,56.35,1785.51, -80,1024,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, -256,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,71.439,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,41.4885,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,112.9275,0,12.48,12482.61, -256,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,71.439,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,41.4885,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,112.9275,0,12.48,12482.61, -256,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,71.439,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,41.4885,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,112.9275,0,12.48,12482.61, -256,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,71.439,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,41.4885,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,112.9275,0,12.48,12482.61, -256,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,71.439,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,41.4885,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,112.9275,0,12.48,12482.61, -256,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,120.1868,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,59.0038,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,179.1906,0,15.73,7868.57, -256,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,141.1318,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,79.7079,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,220.8397,0,25.53,6387.72, -256,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,156.0506,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,92.4632,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,248.5138,0,45.37,5681.93, -256,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,161.4495,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,118.1586,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,279.6081,0,80.64,5059.91, -256,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,203.4884,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,212.7151,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,416.2035,0,108.35,3412.5, -256,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,328.7665,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,403.3563,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,732.1228,0,123.2,1955.01, -256,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,137.08,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,71.7868,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,16.0%,208.8668,0,13.49,13496.24, -256,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,137.08,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,71.7868,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,16.0%,208.8668,0,13.49,13496.24, -256,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,137.08,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,71.7868,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,16.0%,208.8668,0,13.49,13496.24, -256,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,137.08,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,71.7868,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,16.0%,208.8668,0,13.49,13496.24, -256,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,137.08,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,71.7868,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,16.0%,208.8668,0,13.49,13496.24, -256,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,205.7119,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,103.7369,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,16.0%,309.4488,0,18.22,9110.59, -256,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,282.9681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,143.4641,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.9%,426.4322,0,26.44,6612.89, -256,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,313.4267,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,170.2943,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,483.721,0,46.61,5832.55, -256,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,318.0049,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,179.0098,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,497.0147,0,90.74,5682.08, -256,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,403.3586,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,227.8796,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,631.2382,0,142.88,4482.59, -256,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,553.5171,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,421.0368,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,974.5539,0,185.1,2914.76, -256,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,137.51,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,71.4507,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,208.9607,0,13.49,13490.17, -256,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,137.51,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,71.4507,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,208.9607,0,13.49,13490.17, -256,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,137.51,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,71.4507,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,208.9607,0,13.49,13490.17, -256,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,137.51,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,71.4507,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,208.9607,0,13.49,13490.17, -256,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,137.51,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,71.4507,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,208.9607,0,13.49,13490.17, -256,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,206.2526,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,103.5784,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.9%,309.831,0,18.19,9099.35, -256,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,282.8631,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,144.8538,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,427.7169,0,26.36,6593.03, -256,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,312.8646,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,169.9231,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,482.7877,0,46.7,5843.82, -256,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,315.2045,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,179.0137,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,494.2182,0,91.25,5714.23, -256,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,408.1562,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,228.0692,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,636.2254,0,141.76,4447.45, -256,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,572.8802,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,420.9959,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,993.8761,0,181.5,2858.1, -256,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,55.9955,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,45.9382,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,101.9337,0,11.85,5927.15, -256,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,55.9955,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,45.9382,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,101.9337,0,11.85,5927.15, -256,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,55.9955,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,45.9382,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,101.9337,0,11.85,5927.15, -256,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,55.9955,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,45.9382,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,101.9337,0,11.85,5927.15, -256,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,55.9955,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,45.9382,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,101.9337,0,11.85,5927.15, -256,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,81.7704,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,60.4356,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,142.206,0,16.99,4249.98, -256,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,86.8808,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,66.8218,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,153.7026,0,31.44,3934.65, -256,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,88.4556,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,86.688,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,175.1436,0,55.18,3457.46, -256,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,90.4642,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,141.6691,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,232.1333,0,83.26,2615.42, -256,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,95.7813,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,247.6126,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,343.3939,0,112.57,1777.18, -256,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,128.833,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,473.6369,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,602.4699,0,128.32,1023.39, -256,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,71.6188,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,41.3596,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,112.9784,0,12.47,12476.99, -256,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,71.6188,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,41.3596,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,112.9784,0,12.47,12476.99, -256,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,71.6188,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,41.3596,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,112.9784,0,12.47,12476.99, -256,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,71.6188,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,41.3596,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,112.9784,0,12.47,12476.99, -256,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,71.6188,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,41.3596,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,112.9784,0,12.47,12476.99, -256,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,120.1253,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,58.8622,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,178.9875,0,15.75,7877.5, -256,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,141.4796,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,79.0378,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,220.5174,0,25.56,6397.06, -256,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,155.8757,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,91.8804,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,247.7561,0,45.51,5699.31, -256,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,161.8062,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,117.8189,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,279.6251,0,80.64,5059.6, -256,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,203.3392,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,212.9202,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,416.2594,0,108.34,3412.05, -256,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,329.0517,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,400.2276,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,729.2793,0,123.68,1962.63, -256,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,137.1212,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,71.9542,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,209.0754,0,13.48,13482.77, -256,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,137.1212,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,71.9542,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,209.0754,0,13.48,13482.77, -256,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,137.1212,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,71.9542,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,209.0754,0,13.48,13482.77, -256,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,137.1212,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,71.9542,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,209.0754,0,13.48,13482.77, -256,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,137.1212,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,71.9542,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,209.0754,0,13.48,13482.77, -256,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,205.6879,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,103.7835,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,309.4714,0,18.22,9109.92, -256,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,280.6797,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,145.7947,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,426.4744,0,26.44,6612.23, -256,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,313.642,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,168.7999,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,482.4419,0,46.74,5848.01, -256,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,316.5168,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,179.9899,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,496.5067,0,90.83,5687.89, -256,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,403.1043,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,229.7632,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,632.8675,0,142.52,4471.05, -256,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,553.6587,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,425.3929,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,979.0516,0,184.25,2901.37, -256,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,137.2707,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,71.8847,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,209.1554,0,13.48,13477.62, -256,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,137.2707,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,71.8847,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,209.1554,0,13.48,13477.62, -256,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,137.2707,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,71.8847,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,209.1554,0,13.48,13477.62, -256,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,137.2707,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,71.8847,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,209.1554,0,13.48,13477.62, -256,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,137.2707,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,71.8847,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,209.1554,0,13.48,13477.62, -256,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,205.8602,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,104.2026,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,310.0628,0,18.18,9092.55, -256,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,281.4413,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,145.7705,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,427.2118,0,26.39,6600.82, -256,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,314.0411,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,172.2239,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,486.265,0,46.37,5802.03, -256,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,314.8954,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,179.522,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,494.4174,0,91.21,5711.93, -256,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,406.507,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,230.6196,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,637.1266,0,141.56,4441.16, -256,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,575.7451,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,426.4277,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1002.1728,0,180.0,2834.43, -256,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,120.279,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,58.8942,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,179.1732,0,15.73,7869.34, -256,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,141.4809,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,80.3187,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,221.7996,0,25.42,6360.08, -256,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,154.7799,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,91.6297,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,246.4096,0,45.75,5730.45, -256,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,160.0658,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,117.833,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,277.8988,0,81.14,5091.03, -256,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,205.7498,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,212.5856,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,418.3354,0,107.8,3395.11, -256,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,327.5497,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,402.6535,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,730.2032,0,123.52,1960.15, -256,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,57.2319,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,44.9913,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,102.2232,0,11.82,5910.36, -256,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,57.2319,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,44.9913,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,102.2232,0,11.82,5910.36, -256,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,57.2319,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,44.9913,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,102.2232,0,11.82,5910.36, -256,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,57.2319,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,44.9913,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,102.2232,0,11.82,5910.36, -256,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,57.2319,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,44.9913,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,102.2232,0,11.82,5910.36, -256,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,83.8327,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,58.83,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,142.6627,0,16.93,4236.38, -256,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,88.5384,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,66.7218,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,155.2602,0,31.12,3895.18, -256,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,89.7371,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,86.6169,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,176.354,0,54.8,3433.73, -256,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,92.6716,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,141.8887,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,234.5603,0,82.4,2588.36, -256,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,95.6695,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,247.8364,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,343.5059,0,112.53,1776.6, -256,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,133.8828,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,474.349,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,608.2318,0,127.11,1013.7, -80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,123.5681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,83.7681,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,207.3362,0,6.8,6798.77, -80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,123.5681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,83.7681,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,207.3362,0,6.8,6798.77, -80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,123.5681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,83.7681,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,207.3362,0,6.8,6798.77, -80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,123.5681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,83.7681,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,207.3362,0,6.8,6798.77, -80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,123.5681,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,83.7681,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,207.3362,0,6.8,6798.77, -80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,204.1789,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,127.8428,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,332.0217,0,8.49,4246.63, +80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,230.2993,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,169.4253,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,399.7246,0,7.05,7052.15, +80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,228.7896,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,171.5575,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,400.3471,0,7.04,7041.18, +80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,228.7896,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,171.5575,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,400.3471,0,7.04,7041.18, +80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,228.7896,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,171.5575,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,400.3471,0,7.04,7041.18, +80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,228.7896,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,171.5575,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,400.3471,0,7.04,7041.18, +80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,228.7896,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,171.5575,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,400.3471,0,7.04,7041.18, +80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,231.8332,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,169.7631,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,401.5963,0,7.02,7019.28, +80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,231.8332,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,169.7631,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,401.5963,0,7.02,7019.28, +80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,231.8332,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,169.7631,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,401.5963,0,7.02,7019.28, +80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,231.8332,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,169.7631,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,401.5963,0,7.02,7019.28, +80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,231.8332,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,169.7631,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,401.5963,0,7.02,7019.28, +80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,231.6437,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,173.6438,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,405.2875,0,6.95,6955.35, +80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,231.6437,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,173.6438,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,405.2875,0,6.95,6955.35, +80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,231.6437,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,173.6438,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,405.2875,0,6.95,6955.35, +80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,231.6437,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,173.6438,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,405.2875,0,6.95,6955.35, +80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,231.6437,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,173.6438,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,405.2875,0,6.95,6955.35, +80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,255.4375,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,152.2483,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,407.6858,0,23.7,2968.11, +80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,256.0,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,155.7118,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,411.7118,0,23.47,2939.09, +80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,412.7488,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,412.7488,1,13.66,3417.73, +80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,256.7095,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,157.2573,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,413.9668,0,23.34,2923.08, +80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,257.2833,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,157.5746,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,414.8579,0,23.29,2916.8, +80,4,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3, +80,2,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3, +80,8,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3, +80,16,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3, +80,1,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3, +80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,254.9635,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,170.7009,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,425.6644,0,13.24,3314.02, +80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,255.6311,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,170.7752,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,426.4063,0,13.22,3308.26, +80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,252.4238,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,175.0782,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,427.502,0,13.19,3299.78, 80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,256.3235,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,173.5099,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,429.8334,0,13.11,3281.88, +80,64,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,430.9915,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,430.9915,1,14.71,3285.84, +80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,265.3977,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,166.4318,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,431.8295,0,44.76,2807.02, +80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,271.8789,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,161.456,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,433.3349,0,44.6,2797.27, +80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,269.3657,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,164.1089,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,433.4746,0,44.59,2796.37, +80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,268.9473,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,167.0488,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,435.9961,0,44.33,2780.19, +80,2,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07, +80,1,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07, +80,16,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07, +80,4,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07, +80,8,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07, +80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,253.0207,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,220.8733,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,473.894,0,11.9,2976.75, +80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,254.6539,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.1%,220.0918,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,474.7457,0,11.87,2971.41, +80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,252.7997,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,230.1231,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.7%,482.9228,0,11.67,2921.09, +80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,251.2934,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,233.6045,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,484.8979,0,11.63,2909.19, 80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,294.599,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,192.8643,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,487.4633,0,23.13,2896.71, -80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,307.9267,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,225.0382,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,532.9649,0,42.31,2654.57, -80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,388.2337,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,321.3807,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,709.6144,0,63.55,2001.5, -80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, -80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,245.7053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,130.3948,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.1001,0,7.49,7495.12, -80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,245.7053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,130.3948,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.1001,0,7.49,7495.12, -80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,245.7053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,130.3948,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.1001,0,7.49,7495.12, -80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,245.7053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,130.3948,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.1001,0,7.49,7495.12, -80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,245.7053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,130.3948,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.1001,0,7.49,7495.12, -80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,374.0904,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,216.3424,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,590.4328,0,9.55,4774.9, -80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,500.4382,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,283.1229,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.9%,783.5611,0,14.39,3598.89, -80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,218.6275,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,307.0939,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,525.7214,0,2.3,1149.23, -80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,218.6275,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,307.0939,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,525.7214,0,2.3,1149.23, -80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,218.6275,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,307.0939,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,525.7214,0,2.3,1149.23, -80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,218.6275,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,307.0939,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,525.7214,0,2.3,1149.23, -80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,218.6275,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,307.0939,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,525.7214,0,2.3,1149.23, -80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,124.4194,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.1785,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,207.5979,0,6.79,6790.19, -80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,124.4194,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.1785,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,207.5979,0,6.79,6790.19, -80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,124.4194,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.1785,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,207.5979,0,6.79,6790.19, -80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,124.4194,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.1785,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,207.5979,0,6.79,6790.19, -80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,124.4194,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,83.1785,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,207.5979,0,6.79,6790.19, -80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,204.8947,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,128.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,333.8305,0,8.44,4223.62, -80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,252.4238,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,175.0782,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,427.502,0,13.19,3299.78, +80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,285.3733,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,6.3%,202.3184,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,487.6917,0,23.12,2895.35, 80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,290.4107,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,199.2574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,489.6681,0,23.02,2883.66, -80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,313.3394,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,212.1627,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,525.5021,0,42.91,2692.27, -80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,395.4189,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,320.7407,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,716.1596,0,62.97,1983.21, -80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, -80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,569.0736,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,348.0744,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,917.148,0,24.59,3076.19, -80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,613.9345,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,362.7996,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,976.7341,0,46.17,2891.35, -80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,725.7726,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,508.3457,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1234.1183,0,73.08,2292.8, -80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, -80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,272.3833,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,410.5794,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,682.9627,0,3.54,884.93, -80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,275.6717,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,483.604,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,759.2757,0,6.36,796.5, -80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,282.1267,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,500.0393,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,782.166,0,12.36,774.2, -80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,296.2706,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,517.6107,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,813.8813,0,23.75,745.96, -80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,324.2019,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,548.1039,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,872.3058,0,44.31,699.61, -80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, -80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,244.2105,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,132.0083,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.2188,0,7.49,7492.76, -80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,244.2105,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,132.0083,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.2188,0,7.49,7492.76, -80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,244.2105,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,132.0083,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.2188,0,7.49,7492.76, -80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,244.2105,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,132.0083,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.2188,0,7.49,7492.76, -80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,244.2105,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,132.0083,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,376.2188,0,7.49,7492.76, -80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,366.6026,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,203.0742,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,569.6768,0,9.9,4948.88, -80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,517.7397,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,297.5602,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.9%,815.2999,0,13.83,3458.79, -80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,571.8527,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,328.5069,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,900.3596,0,25.04,3133.55, -80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,596.114,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,376.262,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,972.376,0,46.38,2904.31, -80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,703.0772,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,515.3501,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,1218.4273,0,74.03,2322.32, -80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, -80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,218.3233,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,305.0925,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.9%,523.4158,0,2.31,1154.3, -80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,218.3233,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,305.0925,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.9%,523.4158,0,2.31,1154.3, +80,128,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,285.6053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,3.7%,204.5325,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,490.1378,0,25.88,2892.13, +80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,300.9442,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,195.4701,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.5%,496.4143,0,22.71,2844.48, +80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,304.2414,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,199.2251,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,503.4665,0,22.39,2804.63, +80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,243.4825,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,262.4499,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,505.9324,0,11.14,2788.24, +80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,247.1864,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,262.0145,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,509.2009,0,11.07,2770.35, +80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,295.5849,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,214.8909,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,510.4758,0,44.17,2771.51, +80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, +80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, +80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, +80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, +80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, +80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, +80,32768,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, +80,4096,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, +80,16384,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, +80,8192,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, +80,1024,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, +80,2048,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, +80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, +80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, +80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, +80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, +80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, +80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, +80,32768,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, +80,8192,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, +80,2048,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, +80,1024,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, +80,16384,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, +80,4096,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, +80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,249.8568,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf2E,0.0%,269.7859,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,519.6427,0,10.85,2714.68, +80,32,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,520.7061,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,520.7061,1,54.13,2792.39, 80,4,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,218.3233,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,305.0925,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.9%,523.4158,0,2.31,1154.3, -80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,218.3233,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,305.0925,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.9%,523.4158,0,2.31,1154.3, 80,16,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,218.3233,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,305.0925,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.9%,523.4158,0,2.31,1154.3, -80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,272.2826,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,435.0107,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.9%,707.2933,0,3.42,854.49, -80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,275.5136,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,479.9084,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,755.422,0,6.4,800.57, -80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,280.3229,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,496.6503,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,776.9732,0,12.44,779.37, -80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,293.1722,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,514.6465,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,807.8187,0,23.93,751.56, -80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,316.2687,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,545.4219,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,861.6906,0,44.86,708.23, -80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, -80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.9899,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,81.8591,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,206.849,0,6.81,6814.78, -80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.9899,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,81.8591,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,206.849,0,6.81,6814.78, -80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.9899,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,81.8591,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,206.849,0,6.81,6814.78, -80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.9899,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,81.8591,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,206.849,0,6.81,6814.78, -80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.9899,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,81.8591,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,206.849,0,6.81,6814.78, -80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,205.3401,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,128.7497,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,334.0898,0,8.44,4220.35, -80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,255.6311,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,170.7752,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,426.4063,0,13.22,3308.26, -80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,304.2414,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,199.2251,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,503.4665,0,22.39,2804.63, +80,8,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,218.3233,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,305.0925,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.9%,523.4158,0,2.31,1154.3, +80,2,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,218.3233,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,305.0925,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.9%,523.4158,0,2.31,1154.3, +80,1,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,218.3233,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,305.0925,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.9%,523.4158,0,2.31,1154.3, +80,256,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,302.4733,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,2.5%,221.0756,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,523.5489,0,48.45,2712.82, +80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,313.3394,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,212.1627,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,525.5021,0,42.91,2692.27, +80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,218.6275,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,307.0939,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,525.7214,0,2.3,1149.23, +80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,218.6275,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,307.0939,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,525.7214,0,2.3,1149.23, +80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,218.6275,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,307.0939,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,525.7214,0,2.3,1149.23, +80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,218.6275,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,307.0939,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,525.7214,0,2.3,1149.23, +80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,218.6275,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,307.0939,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,525.7214,0,2.3,1149.23, 80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,301.0373,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,225.1659,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,526.2032,0,42.85,2688.68, -80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,398.4468,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,319.45,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,717.8968,0,62.82,1978.41, -80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, -80,1,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.1564,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.5223,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,207.6787,0,6.79,6787.55, -80,2,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.1564,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.5223,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,207.6787,0,6.79,6787.55, -80,4,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.1564,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.5223,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,207.6787,0,6.79,6787.55, -80,8,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.1564,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.5223,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,207.6787,0,6.79,6787.55, -80,16,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,124.1564,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,83.5223,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,207.6787,0,6.79,6787.55, -80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,198.0641,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,130.4169,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,328.481,0,8.58,4292.41, -80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,254.9635,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,170.7009,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,425.6644,0,13.24,3314.02, -80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,300.9442,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,195.4701,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.5%,496.4143,0,22.71,2844.48, +80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, +80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, +80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, +80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, +80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, +80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, 80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,310.1395,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,218.1749,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,528.3144,0,42.68,2677.93, -80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,384.4754,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,319.2123,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,703.6877,0,64.09,2018.36, -80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, -80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,241.8455,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,132.6515,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,374.497,0,7.53,7527.21, -80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,241.8455,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,132.6515,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,374.497,0,7.53,7527.21, -80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,241.8455,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,132.6515,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,374.497,0,7.53,7527.21, -80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,241.8455,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,132.6515,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,374.497,0,7.53,7527.21, -80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,241.8455,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,132.6515,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,374.497,0,7.53,7527.21, -80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,375.1735,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,214.1587,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,589.3322,0,9.57,4783.82, -80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,497.6768,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,302.6195,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,800.2963,0,14.09,3523.63, -80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9263,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,346.2366,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,907.1629,0,24.86,3110.05, -80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,578.2908,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,375.1603,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,953.4511,0,47.3,2961.95, -80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,727.1819,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,509.1783,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1236.3602,0,72.95,2288.64, -80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, -80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,221.5506,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,326.2097,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,547.7603,0,2.21,1102.99, +80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,307.9267,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,225.0382,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,532.9649,0,42.31,2654.57, +80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, +80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, +80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, +80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, +80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, +80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, +80,2048,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, +80,16384,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, +80,4096,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, +80,32768,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, +80,1024,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, +80,8192,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, +80,1024,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, +80,32768,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, +80,4096,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, +80,16384,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, +80,8192,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, +80,2048,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, +80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,278.3578,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,264.9256,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,543.2834,0,20.75,2599.08, +80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,290.3441,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,254.7127,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,545.0568000000001,0,20.68,2590.63, +80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,290.9005,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,254.8242,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,545.7247,0,20.66,2587.46, +80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,221.5506,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,326.2097,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,547.7603,0,2.21,1102.99, 80,2,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,221.5506,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,326.2097,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,547.7603,0,2.21,1102.99, -80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,221.5506,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,326.2097,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,547.7603,0,2.21,1102.99, 80,8,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,221.5506,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,326.2097,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,547.7603,0,2.21,1102.99, -80,16,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,221.5506,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,326.2097,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,547.7603,0,2.21,1102.99, -80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,276.3545,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,408.058,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,684.4125,0,3.53,883.05, -80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,278.4781,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,520.2659,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,798.744,0,6.05,757.15, -80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,283.5484,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,497.074,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,780.6224,0,12.38,775.73, -80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,296.1626,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,515.0018,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,811.1644,0,23.83,748.46, -80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,319.2415,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,544.9096,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,864.1511,0,44.73,706.21, -80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, -80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,243.5773,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,134.825,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,378.4023,0,7.45,7449.52, -80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,243.5773,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,134.825,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,378.4023,0,7.45,7449.52, -80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,243.5773,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,134.825,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,378.4023,0,7.45,7449.52, -80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,243.5773,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,134.825,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,378.4023,0,7.45,7449.52, -80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,243.5773,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,134.825,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,378.4023,0,7.45,7449.52, -80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,377.9925,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,208.2557,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,586.2482,0,9.62,4808.99, -80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,508.1175,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,301.8424,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,809.9599,0,13.92,3481.59, -80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,567.2689,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,347.973,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,915.2419,0,24.64,3082.6, -80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,597.1745,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,373.7546,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,970.9291,0,46.45,2908.63, -80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,734.3088,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,509.0543,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1243.3631,0,72.54,2275.75, -80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, -80,56,6144,4096,8,2,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,228.7482,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.5%,0.0,Null,0.0%,228.7482,1,73.93,2644.88, -80,1,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3, -80,2,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3, -80,4,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3, -80,8,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3, -80,16,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3, +80,4,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,221.5506,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,326.2097,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,547.7603,0,2.21,1102.99, +80,1,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,221.5506,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,326.2097,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,547.7603,0,2.21,1102.99, 80,32,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,359.0112,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,190.8827,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.2%,549.8939,0,51.26,2644.17, -80,64,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,631.2833,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.9%,0.0,Null,0.0%,631.2833,1,89.3,2304.36, -80,128,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,772.5524,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.9%,0.0,Null,0.0%,772.5524,1,145.94,1884.76, -80,256,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,1166.708,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.9%,0.0,Null,0.0%,1166.708,1,193.27,1250.38, -80,512,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,2209.3824,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.8%,0.0,Null,0.0%,2209.3824,1,204.12,662.78, -80,1024,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, -80,1,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07, -80,2,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07, -80,4,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07, -80,8,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07, -80,16,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07, -80,32,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,520.7061,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,520.7061,1,54.13,2792.39, -80,64,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,622.6569,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.9%,0.0,Null,0.0%,622.6569,1,90.53,2336.28, -80,128,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,687.274,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.2%,0.0,Null,0.0%,687.274,1,164.04,2118.63, -80,256,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1021.9423,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,1021.9423,1,220.64,1427.51, -80,512,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1749.1923,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,1749.1923,1,257.82,837.15, -80,1024,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, -256,1024,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,86.9012,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,86.7313,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,173.6325,0,222.62,3575.12, -256,512,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,77.6488,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,61.2156,moe_ck2stages_gemm2_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,138.8644,0,139.18,4409.83, -256,256,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,73.7682,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,42.2564,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,116.0246,0,83.29,5241.77, -256,128,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,72.9667,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,41.6406,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,114.6073,0,42.16,5288.29, -256,64,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,71.4856,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,39.357,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,110.8426,0,21.8,5458.45, -256,32,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,62.7201,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,35.3504,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,98.0705,0,12.32,6163.97, -256,1,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,47.8381,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,28.9978,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,76.8359,0,7.86,7864.06, -256,2,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,47.8381,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,28.9978,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,76.8359,0,7.86,7864.06, -256,4,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,47.8381,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,28.9978,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,76.8359,0,7.86,7864.06, -256,8,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,47.8381,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,28.9978,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,76.8359,0,7.86,7864.06, -256,16,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,47.8381,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,28.9978,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,76.8359,0,7.86,7864.06, -256,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,45.285,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,9.0945,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,54.3795,0,1.62,25916.16, -256,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,46.5232,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,11.8082,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.3%,58.3314,0,3.02,24160.73, -256,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,48.2418,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,17.8498,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,66.0916,0,5.33,21324.53, -256,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,53.6435,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,25.7951,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,79.4386,0,8.87,17742.74, -256,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,71.1678,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,41.5098,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,112.6776,0,12.51,12510.3, -256,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,158.4834,_ZN5aiter49fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,158.4834,1,17.78,8896.67, -256,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,212.9873,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,212.9873,1,26.47,6623.22, -256,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,241.6039,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,241.6039,1,46.66,5844.44, -256,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,249.5786,_ZN5aiter49fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,249.5786,1,90.35,5668.72, -256,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,260.9691,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,260.9691,1,172.81,5442.39, -256,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,359.4797,_ZN5aiter49fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,359.4797,1,250.9,3981.61, -256,1,5120,1024,128,1,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,13.451,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,7.2298,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,20.6808,0,1.52,97350.26, -256,2,5120,1024,128,1,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,19.0667,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,9.7505,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,28.8172,0,2.18,69864.41, -256,4,5120,1024,128,1,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,19.5061,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,12.6837,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,32.1898,0,3.91,62545.51, -256,8,5120,1024,128,1,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,21.0549,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,16.7646,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,37.8195,0,6.65,53236.79, -256,16,5120,1024,128,1,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,31.6908,moe_ck2stages_gemm1_256x32x64x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,23.8915,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,55.5823,0,9.06,36225.77, -256,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.0568,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,231.4032,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,556.46,0,162.09,5117.95, -256,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,325.7872,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,223.4421,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.2293,0,164.22,5185.32, -256,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,168.3246,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,205.6813,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,374.0059,0,241.16,3826.96, -256,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,168.5532,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,205.3887,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,373.9419,0,241.2,3827.62, -256,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,366.6991,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,366.6991,1,245.96,3903.22, -256,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,163.9563,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf3E,0.0%,218.341,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,382.2973,0,235.93,3743.96, -256,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.3028,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,368.4383,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1017.7411,0,177.24,5567.73, -256,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,644.3248,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,363.2348,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1007.5596,0,179.04,5623.99, -256,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.7642,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_64x128_2tg_pf2E,0.0%,226.0569,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,554.8211,0,325.13,5119.83, -256,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.5261,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,225.2001,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,553.7262,0,325.77,5129.96, -256,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.8711,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,248.6884,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,582.5595000000001,0,309.65,4876.06, -256,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,333.2413,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,244.2778,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,577.5191,0,312.35,4918.61, -256,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,171.882,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,152.5554,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,324.4374,0,238.29,3774.96, -256,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.8822,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,228.9235,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,554.8057,0,162.57,5133.21, -256,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,326.6863,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,223.1283,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.8146,0,164.04,5179.81, -256,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,171.438,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,205.2641,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,376.7021,0,239.43,3799.57, -256,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,169.5988,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,205.5691,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,375.1679,0,240.41,3815.11, -256,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,367.9317,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0,367.9317,1,245.14,3890.14, -256,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,647.0892,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,363.288,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1010.3772,0,178.54,5608.31, -256,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.9474,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_64x256_pf2E,0.0%,225.6663,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,554.6137,0,325.25,5121.75, -256,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.6203,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x256_pf2E,0.0%,227.39,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,556.0102999999999,0,324.43,5108.88, -256,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.1281,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,249.1231,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,582.2512,0,309.81,4878.64, -256,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,332.9031,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,245.6419,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,578.545,0,311.8,4909.89, -256,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,169.6864,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,153.1648,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,322.8512,0,239.46,3793.5, -256,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.8959,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,144.4775,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.3734,0,244.36,3871.17, -256,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.393,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,125.505,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,217.898,0,354.8,2829.59, -256,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.6859,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,125.2007,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.8866,0,353.19,2816.81, -256,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,91.8397,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.1852,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,251.0249,0,307.98,2456.18, -256,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,92.5497,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,158.3452,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,250.8949,0,308.13,2457.45, -256,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,162.8098,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,221.1124,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,383.9222,0,234.93,3728.12, -256,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.2553,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.4126,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1020.6679,0,176.74,5551.76, -256,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.7102,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,144.3167,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.0269,0,244.63,3875.42, -256,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.6237,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,125.9531,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,218.5768,0,353.69,2820.81, -256,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.1182,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,125.1912,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.3094,0,354.13,2824.26, -256,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,92.8309,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.4309,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,252.2618,0,306.46,2444.14, -256,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,94.4833,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,162.4132,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,256.8965,0,300.94,2400.04, -80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, -80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, -80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,394.9761,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,427.6178,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,822.5939,0,109.65,1739.99, -80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, -80,2048,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, -80,2048,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, -80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, -80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, -80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, -80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, -80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, +80,4096,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,553.26,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,553.26,1,139.73,591.32, +80,2048,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,553.26,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,553.26,1,139.73,591.32, +80,8192,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,553.26,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,553.26,1,139.73,591.32, +80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,292.2127,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,265.0419,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,557.2546,0,20.23,2533.92, +80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,347.2111,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,210.4569,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,557.668,0,69.31,2181.13, +80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,352.9213,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,213.1932,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,566.1144999999999,0,68.28,2148.59, +80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,299.2146,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,268.1991,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,567.4137000000001,0,39.74,2493.4, +80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,347.7216,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,219.8661,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,567.5877,0,68.1,2143.01, +80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,300.1714,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,268.3368,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,568.5082,0,39.66,2488.6, +80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,366.6026,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,203.0742,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,569.6768,0,9.9,4948.88, +80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,268.8237,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,301.9153,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,570.739,0,135.45,1080.29, +80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,268.8237,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,301.9153,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,570.739,0,135.45,1080.29, +80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,268.8237,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,301.9153,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,570.739,0,135.45,1080.29, 80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,268.8237,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,301.9153,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,570.739,0,135.45,1080.29, -80,2048,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, -80,2048,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, -80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, -80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, -80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, -80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, -80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, -80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, -80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, -80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, -80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, -80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,752.5409,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.4453,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1447.9861999999998,0,124.58,1961.75, -80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, -80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, -80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, -80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, -80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, -80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, -80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,390.1172,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,531.1275,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,921.2447,0,97.9,1553.67, -80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, -80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, -80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, -80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, -80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, -80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,759.5362,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,596.1779,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1355.7141,0,133.06,2095.27, -80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, -80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,749.1329,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.2144,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1444.3473,0,124.89,1966.7, -80,2048,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, -80,2048,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, -80,2048,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, -80,2048,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, -256,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,328.7665,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,403.3563,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,732.1228,0,123.2,1955.01, -256,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,553.5171,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,421.0368,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,974.5539,0,185.1,2914.76, -256,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,572.8802,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,420.9959,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,993.8761,0,181.5,2858.1, -256,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,128.833,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,473.6369,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,602.4699,0,128.32,1023.39, -256,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,329.0517,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,400.2276,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,729.2793,0,123.68,1962.63, -256,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,553.6587,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,425.3929,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,979.0516,0,184.25,2901.37, -256,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,575.7451,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,426.4277,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1002.1728,0,180.0,2834.43, -256,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,327.5497,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,402.6535,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,730.2032,0,123.52,1960.15, -256,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,133.8828,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,474.349,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,608.2318,0,127.11,1013.7, -80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, -80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, -80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, -80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, -80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, -80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, -80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, -80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, -80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, -80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, -80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, -80,2048,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, -80,2048,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, -256,2048,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,86.9012,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,86.7313,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,173.6325,0,222.62,3575.12, -256,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,359.4797,_ZN5aiter49fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,359.4797,1,250.9,3981.61, -256,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.0568,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,231.4032,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,556.46,0,162.09,5117.95, -256,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,325.7872,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,223.4421,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.2293,0,164.22,5185.32, -256,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,168.3246,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,205.6813,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,374.0059,0,241.16,3826.96, -256,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,168.5532,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,205.3887,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,373.9419,0,241.2,3827.62, -256,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,366.6991,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_ps_32x256E,0.0%,0.0,Null,0,366.6991,1,245.96,3903.22, -256,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,163.9563,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf3E,0.0%,218.341,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,382.2973,0,235.93,3743.96, -256,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.3028,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,368.4383,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1017.7411,0,177.24,5567.73, -256,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,644.3248,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,363.2348,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1007.5596,0,179.04,5623.99, -256,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.7642,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_64x128_2tg_pf2E,0.0%,226.0569,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,554.8211,0,325.13,5119.83, -256,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.5261,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,225.2001,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,553.7262,0,325.77,5129.96, -256,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.8711,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,248.6884,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,582.5595000000001,0,309.65,4876.06, -256,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,333.2413,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,244.2778,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,577.5191,0,312.35,4918.61, -256,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,171.882,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,152.5554,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,324.4374,0,238.29,3774.96, -256,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.8822,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,228.9235,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,554.8057,0,162.57,5133.21, -256,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,326.6863,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,223.1283,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.8146,0,164.04,5179.81, -256,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,171.438,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,205.2641,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,376.7021,0,239.43,3799.57, -256,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,169.5988,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,205.5691,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,375.1679,0,240.41,3815.11, -256,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,367.9317,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0,367.9317,1,245.14,3890.14, -256,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,647.0892,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,363.288,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1010.3772,0,178.54,5608.31, -256,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.9474,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_64x256_pf2E,0.0%,225.6663,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,554.6137,0,325.25,5121.75, -256,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.6203,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x256_pf2E,0.0%,227.39,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,556.0102999999999,0,324.43,5108.88, -256,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.1281,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,249.1231,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,582.2512,0,309.81,4878.64, -256,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,332.9031,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,245.6419,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,578.545,0,311.8,4909.89, -256,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,169.6864,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,153.1648,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,322.8512,0,239.46,3793.5, -256,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.8959,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,144.4775,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.3734,0,244.36,3871.17, -256,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.393,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,125.505,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,217.898,0,354.8,2829.59, -256,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.6859,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,125.2007,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.8866,0,353.19,2816.81, -256,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,91.8397,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.1852,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,251.0249,0,307.98,2456.18, -256,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,92.5497,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,158.3452,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,250.8949,0,308.13,2457.45, -256,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,162.8098,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,221.1124,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,383.9222,0,234.93,3728.12, -256,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.2553,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.4126,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1020.6679,0,176.74,5551.76, -256,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.7102,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,144.3167,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.0269,0,244.63,3875.42, -256,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.6237,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,125.9531,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,218.5768,0,353.69,2820.81, -256,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.1182,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,125.1912,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.3094,0,354.13,2824.26, -256,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,92.8309,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.4309,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,252.2618,0,306.46,2444.14, -256,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,94.4833,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,162.4132,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,256.8965,0,300.94,2400.04, -80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, -80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, -80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,394.9761,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,427.6178,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,822.5939,0,109.65,1739.99, -80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, -80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,723.0851,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,723.0851,1,124.74,1979.44, -80,16384,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, -80,16384,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, -80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, -80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, -80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, -80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, -80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, 80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,268.8237,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,301.9153,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,570.739,0,135.45,1080.29, -80,16384,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, -80,16384,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, -80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, -80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, -80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, -80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, -80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, -80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, -80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, -80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, -80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, -80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,752.5409,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.4453,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1447.9861999999998,0,124.58,1961.75, -80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, -80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, -80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, -80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, -80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, +80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,268.8237,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,301.9153,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,570.739,0,135.45,1080.29, +80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,351.9236,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,220.5397,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,572.4633,0,67.52,2124.76, +80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, +80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, 80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, -80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,390.1172,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,531.1275,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,921.2447,0,97.9,1553.67, -80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, -80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, -80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, -80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, -80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, -80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,759.5362,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,596.1779,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1355.7141,0,133.06,2095.27, -80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, -80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,749.1329,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.2144,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1444.3473,0,124.89,1966.7, -80,16384,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, -80,16384,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, -80,16384,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, -80,16384,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, -256,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,328.7665,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,403.3563,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,732.1228,0,123.2,1955.01, -256,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,553.5171,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,421.0368,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,974.5539,0,185.1,2914.76, -256,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,572.8802,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,420.9959,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,993.8761,0,181.5,2858.1, -256,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,128.833,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,473.6369,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,602.4699,0,128.32,1023.39, -256,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,329.0517,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,400.2276,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,729.2793,0,123.68,1962.63, -256,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,553.6587,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,425.3929,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,979.0516,0,184.25,2901.37, -256,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,575.7451,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,426.4277,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1002.1728,0,180.0,2834.43, -256,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,327.5497,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,402.6535,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,730.2032,0,123.52,1960.15, -256,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,133.8828,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,474.349,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,608.2318,0,127.11,1013.7, -80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, -80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, -80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, -80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, -80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, -80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, -80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, -80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, -80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, -80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, -80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, -80,16384,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,827.4898,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,827.4898,1,122.62,1736.35, -80,16384,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, -80,16384,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, -256,16384,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,86.9012,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,86.7313,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,173.6325,0,222.62,3575.12, -256,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,359.4797,_ZN5aiter52fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,359.4797,1,250.9,3981.61, -80,1,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,39.59,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,13.014,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.2%,52.604,0,1.88,26895.53, -80,2,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,41.4577,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,19.353,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.2%,60.8107,0,3.26,23266.2, -80,4,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,48.5685,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,32.2906,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,80.8591,0,4.9,17498.06, -80,8,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,72.5315,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,52.4907,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,125.0222,0,6.34,11317.7, -80,16,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,122.8339,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,81.6731,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,204.507,0,7.75,6919.74, -80,32,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,201.5918,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,7.5%,129.3064,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,330.8982,0,9.58,4277.69, -80,64,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,430.9915,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,430.9915,1,14.71,3285.84, -80,128,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,285.6053,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,3.7%,204.5325,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,490.1378,0,25.88,2892.13, -80,256,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,302.4733,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,2.5%,221.0756,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,523.5489,0,48.45,2712.82, -80,1024,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,826.1396,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,826.1396,1,122.82,1739.19, -80,2048,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1278.9473,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1278.9473,1,158.68,1140.65, -80,1,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,39.3207,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,12.9446,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.2%,52.2653,0,1.69,26964.5, -80,2,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,40.8689,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,18.1489,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.2%,59.0178,0,2.98,23879.73, -80,4,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,46.2328,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.7%,28.8818,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,75.1146,0,4.69,18762.96, -80,8,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,64.2093,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,1.8%,45.801,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.4%,110.0103,0,6.41,12812.06, -80,16,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,116.5659,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,74.8227,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,191.3886,0,7.36,7365.28, -80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,192.7713,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,6.2%,126.9169,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,319.6882,0,8.82,4410.47, -80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,412.7488,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,412.7488,1,13.66,3417.73, -80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,285.3733,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,6.3%,202.3184,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,487.6917,0,23.12,2895.35, -80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,295.5849,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,214.8909,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.3%,510.4758,0,44.17,2771.51, +80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, +80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, +80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, +80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,301.0401,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,279.1768,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,580.2169,0,38.86,2438.38, +80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,300.6316,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,279.9782,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,580.6098,0,38.84,2436.73, +80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,280.8024,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,303.8392,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,584.6415999999999,0,19.28,2415.22, +80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, +80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, +80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, +80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, +80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, +80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, +80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,377.9925,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,208.2557,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,586.2482,0,9.62,4808.99, +80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, +80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, +80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, +80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, +80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, +80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, +80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,375.1735,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,214.1587,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,589.3322,0,9.57,4783.82, +80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,374.0904,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,216.3424,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,590.4328,0,9.55,4774.9, +80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,287.1619,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,303.9818,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,591.1437000000001,0,19.07,2388.66, +80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,304.03,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,287.1332,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,591.1632,0,76.29,2402.55, +80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,285.1047,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,312.3733,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,597.478,0,18.87,2363.33, +80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,370.0464,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,232.0003,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,602.0467,0,9.36,4682.79, +80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,316.0831,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,287.2443,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,603.3274,0,74.75,2354.11, +80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,369.4651,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,236.5581,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,606.0232,0,9.3,4652.07, +80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,368.433,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,237.6243,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,606.0572999999999,0,4.65,4652.18, +80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,312.6244,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,296.8907,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,609.5151,0,73.99,2330.21, +80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,376.8336,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,233.1548,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,609.9884,0,4.62,4622.2, +80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,369.2362,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,241.8314,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.9%,611.0676,0,9.23,4613.66, +80,32,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,367.326,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,243.7681,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,611.0941,0,4.61,4613.84, +80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,314.4244,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,297.6385,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,612.0629,0,73.68,2320.51, +80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,292.8938,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,319.6213,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,612.5151000000001,0,36.81,2309.81, +80,32,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,368.8396,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,244.9596,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,613.7992,0,4.59,4593.51, +80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,372.7495,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.1%,246.5629,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.5%,619.3124,0,9.1,4552.24, +80,64,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,622.6569,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.9%,0.0,Null,0.0%,622.6569,1,90.53,2336.28, +80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,301.3686,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,321.4971,moe_ck2stages_gemm2_256x32x64x256_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,622.8657000000001,0,36.2,2271.42, +80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,363.9112,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,259.8372,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,623.7484,0,9.04,4519.87, +80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,367.8369,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.0%,260.1737,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,628.0106000000001,0,8.98,4489.19, +80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,363.3125,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,265.3606,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,628.6731,0,8.97,4484.46, +80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,366.9854,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,263.1156,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,630.101,0,8.95,4474.3, +80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,302.8986,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,328.0652,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,630.9638,0,35.74,2242.27, +80,64,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,631.2833,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.9%,0.0,Null,0.0%,631.2833,1,89.3,2304.36, +80,512,6144,4096,8,2,ActivationType.Silu,torch.bfloat16,torch.int8,torch.int8,QuantType.per_Tensor,1,0,64,0,386.1143,_ZN5aiter49fmoe_stage1_bf16_pertokenInt8_g1u1_64x128_2tg_pf3E,0.0%,250.0186,moe_ck2stages_gemm2_256x64x128x256_1x4_MulABScaleExpertWeight_v3_Nswizzle0_Quant1_MulRoutedWeight1_I8_I8_B16,2.1%,636.1329000000001,0,243.06,964.29, +80,512,6144,4096,8,2,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,64,0,373.4158,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_64x128_2tg_pf3E,0.0%,268.4886,moe_ck2stages_gemm2_256x64x128x256_1x4_MulABScaleExpertWeight_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.3%,641.9044,0,240.88,955.62, +80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,305.4473,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,349.9039,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,655.3512000000001,0,68.81,2167.23, +80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,310.6611,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,349.0712,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,659.7322999999999,0,68.36,2152.84, +80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,305.7627,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.1%,359.7434,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,665.5061000000001,0,67.76,2134.16, +80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,272.3833,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,410.5794,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,682.9627,0,3.54,884.93, +80,32,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,276.3545,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,408.058,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,684.4125,0,3.53,883.05, +80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,438.1766,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,247.4425,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,685.6191,0,4.11,8222.65, +80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,438.1766,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,247.4425,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,685.6191,0,4.11,8222.65, +80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,438.1766,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,247.4425,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,685.6191,0,4.11,8222.65, +80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,438.1766,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,247.4425,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,685.6191,0,4.11,8222.65, +80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,438.1766,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,247.4425,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,685.6191,0,4.11,8222.65, +80,128,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,687.274,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.2%,0.0,Null,0.0%,687.274,1,164.04,2118.63, +80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,442.5097,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,249.4045,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,691.9142,0,4.07,8147.84, +80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,442.5097,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,249.4045,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,691.9142,0,4.07,8147.84, +80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,442.5097,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,249.4045,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,691.9142,0,4.07,8147.84, +80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,442.5097,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,249.4045,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,691.9142,0,4.07,8147.84, +80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,442.5097,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,249.4045,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,691.9142,0,4.07,8147.84, +80,8,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,438.8756,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,256.4253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,695.3009,0,4.05,8108.15, +80,2,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,438.8756,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,256.4253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,695.3009,0,4.05,8108.15, +80,4,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,438.8756,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,256.4253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,695.3009,0,4.05,8108.15, +80,1,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,438.8756,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,256.4253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,695.3009,0,4.05,8108.15, +80,16,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,438.8756,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,256.4253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,695.3009,0,4.05,8108.15, +80,16,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,440.6719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,255.5291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,696.201,0,4.05,8097.67, +80,1,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,440.6719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,255.5291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,696.201,0,4.05,8097.67, +80,8,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,440.6719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,255.5291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,696.201,0,4.05,8097.67, +80,2,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,440.6719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,255.5291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,696.201,0,4.05,8097.67, +80,4,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,440.6719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,255.5291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,696.201,0,4.05,8097.67, +80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,384.4754,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,319.2123,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,703.6877,0,64.09,2018.36, +80,32,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,272.2826,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,435.0107,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.9%,707.2933,0,3.42,854.49, +80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,388.2337,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,321.3807,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,709.6144,0,63.55,2001.5, +80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,395.4189,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,320.7407,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,716.1596,0,62.97,1983.21, +80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,398.4468,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,319.45,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,717.8968,0,62.82,1978.41, 80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,718.9196,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,718.9196,1,125.46,1990.91, -80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1195.0627,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1195.0627,1,150.94,1216.11, -80,1,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,14.7181,moe_ck2stages_gemm1_256x16x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,8.8578,moe_ck2stages_gemm2_64x16x64x64_1x1_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.1%,23.5759,0,1.6,12809.78, -80,2,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,16.1962,moe_ck2stages_gemm1_256x16x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,10.9481,moe_ck2stages_gemm2_64x16x64x64_1x1_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.1%,27.1443,0,2.78,11126.26, -80,4,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,22.6085,moe_ck2stages_gemm1_256x16x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,21.8513,moe_ck2stages_gemm2_64x16x64x64_1x1_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.1%,44.4598,0,3.4,6793.53, -80,8,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,26.7393,moe_ck2stages_gemm1_256x16x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,30.591,moe_ck2stages_gemm2_64x16x64x64_1x1_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.1%,57.3303,0,5.27,5269.26, -80,16,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,16,0,44.9474,moe_ck2stages_gemm1_256x16x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,44.429,moe_ck2stages_gemm2_64x16x64x64_1x1_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.1%,89.3764,0,6.76,3381.05, -80,32,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,130.6754,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,130.6754,1,9.24,2314.0, -80,40,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,132.1174,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,132.1174,1,11.43,2289.49, -80,64,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,135.0415,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,135.0415,1,17.89,2242.1, -80,128,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,136.8626,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,136.8626,1,35.3,2218.01, -80,256,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,144.1889,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,144.1889,1,67.02,2116.22, -80,512,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,211.5621,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,211.5621,1,91.36,1457.17, -80,1024,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,290.2352,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,290.2352,1,133.18,1083.85, -80,2048,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,553.26,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,553.26,1,139.73,591.32, -256,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,328.7665,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,403.3563,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,732.1228,0,123.2,1955.01, -256,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,328.7665,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,403.3563,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,732.1228,0,123.2,1955.01, -80,4096,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, -80,8192,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, -80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, -80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, -80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, -80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, -80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, +80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,723.0851,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,723.0851,1,124.74,1979.44, +80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,723.0851,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,723.0851,1,124.74,1979.44, +80,64,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,275.5136,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,479.9084,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,755.422,0,6.4,800.57, +80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,275.6717,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,483.604,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,759.2757,0,6.36,796.5, +80,128,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,772.5524,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.9%,0.0,Null,0.0%,772.5524,1,145.94,1884.76, +80,128,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,280.3229,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,496.6503,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,776.9732,0,12.44,779.37, +80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,283.5484,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,497.074,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,780.6224,0,12.38,775.73, +80,128,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,282.1267,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,500.0393,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,782.166,0,12.36,774.2, +80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,500.4382,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,283.1229,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.9%,783.5611,0,14.39,3598.89, +80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,484.4582,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,310.4899,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,794.9481,0,7.09,3547.91, +80,64,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,278.4781,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,520.2659,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,798.744,0,6.05,757.15, +80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,497.6768,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,302.6195,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,800.2963,0,14.09,3523.63, 80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, -80,4096,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, -80,8192,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, -80,4096,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, -80,8192,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, -256,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,133.8828,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,474.349,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,608.2318,0,127.11,1013.7, -256,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,133.8828,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,474.349,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,608.2318,0,127.11,1013.7, -80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, -80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, -80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, -80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, +80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, +80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, +80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, +80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, +80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, +80,64,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,485.1197,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,319.3392,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,804.4589000000001,0,7.01,3505.97, +80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,493.7026,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,313.5552,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,807.2578000000001,0,6.98,3493.81, +80,256,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,293.1722,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,514.6465,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,807.8187,0,23.93,751.56, +80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, +80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, +80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, +80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, +80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, +80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, +80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,508.1175,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,301.8424,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,809.9599,0,13.92,3481.59, +80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,296.1626,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,515.0018,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,811.1644,0,23.83,748.46, +80,256,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,296.2706,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,517.6107,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,813.8813,0,23.75,745.96, +80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,517.7397,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,297.5602,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.9%,815.2999,0,13.83,3458.79, +80,64,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,486.2218,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,330.374,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,816.5958,0,6.9,3453.86, +80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, +80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, +80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, +80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, +80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, +80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, +80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,394.9761,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,427.6178,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,822.5939,0,109.65,1739.99, +80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,394.9761,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,427.6178,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,822.5939,0,109.65,1739.99, +80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,394.9761,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,427.6178,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,822.5939,0,109.65,1739.99, +80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,394.9761,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,427.6178,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,822.5939,0,109.65,1739.99, 80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,394.9761,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,427.6178,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,822.5939,0,109.65,1739.99, 80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,394.9761,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,427.6178,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,822.5939,0,109.65,1739.99, -80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, -80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, -256,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,367.9317,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0,367.9317,1,245.14,3890.14, -256,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,367.9317,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0,367.9317,1,245.14,3890.14, -256,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.1281,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,249.1231,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,582.2512,0,309.81,4878.64, -256,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.1281,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,249.1231,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,582.2512,0,309.81,4878.64, -80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, +80,1024,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,826.1396,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,826.1396,1,122.82,1739.19, +80,16384,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,827.4898,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,827.4898,1,122.62,1736.35, +80,32768,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,827.4898,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,827.4898,1,122.62,1736.35, +80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,509.0501,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,326.8678,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,835.9178999999999,0,13.49,3373.48, +80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,502.1489,moe_ck2stages_gemm1_256x32x64x128_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,339.9999,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.4%,842.1488,0,13.39,3348.52, +80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,510.6484,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,335.3164,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,845.9648,0,13.33,3333.41, +80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,495.9422,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,361.1859,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,857.1281,0,13.15,3290.0, +80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,516.6341,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,344.0188,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,860.6529,0,13.1,3276.52, +80,512,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,316.2687,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,545.4219,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,861.6906,0,44.86,708.23, +80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,319.2415,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,544.9096,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,864.1511,0,44.73,706.21, +80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,507.5861,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,362.0567,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.4%,869.6428,0,12.96,3242.65, +80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,510.4385,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,361.3331,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,871.7716,0,12.93,3234.73, +80,512,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,324.2019,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,548.1039,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.5%,872.3058,0,44.31,699.61, +80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, +80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, 80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, -256,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,329.0517,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,400.2276,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,729.2793,0,123.68,1962.63, -256,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,329.0517,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,400.2276,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,729.2793,0,123.68,1962.63, -80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, +80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, +80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, +80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, +80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, +80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, +80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, +80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, +80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, +80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, +80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,504.1748,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,378.662,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.9%,882.8368,0,12.77,3194.19, +80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, +80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, +80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, +80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, +80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, +80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, +80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, +80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, +80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, +80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, +80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, +80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, +80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,571.8527,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,328.5069,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,900.3596,0,25.04,3133.55, +80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, +80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, +80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, +80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, +80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, +80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, +80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9263,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,346.2366,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,907.1629,0,24.86,3110.05, +80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, +80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, +80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, 80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, -256,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.393,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,125.505,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,217.898,0,354.8,2829.59, -256,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.393,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,125.505,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,217.898,0,354.8,2829.59, -256,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,168.3246,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,205.6813,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,374.0059,0,241.16,3826.96, -256,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,168.3246,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,205.6813,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,374.0059,0,241.16,3826.96, -256,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.7642,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_64x128_2tg_pf2E,0.0%,226.0569,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,554.8211,0,325.13,5119.83, -256,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.7642,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_64x128_2tg_pf2E,0.0%,226.0569,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,554.8211,0,325.13,5119.83, -80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, -80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, -80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, -80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, -80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, -80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, -80,4096,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, -80,8192,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, -256,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.0568,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,231.4032,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,556.46,0,162.09,5117.95, -256,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.0568,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,231.4032,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,556.46,0,162.09,5117.95, -256,4096,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,86.9012,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,86.7313,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,173.6325,0,222.62,3575.12, -256,8192,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,86.9012,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,86.7313,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,173.6325,0,222.62,3575.12, -80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, -80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, -256,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,92.8309,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.4309,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,252.2618,0,306.46,2444.14, -256,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,92.8309,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.4309,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,252.2618,0,306.46,2444.14, -80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, -80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, -80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, -80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, -80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, -80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, -80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, -80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, -256,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,332.9031,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,245.6419,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,578.545,0,311.8,4909.89, -256,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,332.9031,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,245.6419,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,578.545,0,311.8,4909.89, -80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, -80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, -256,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,553.5171,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,421.0368,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,974.5539,0,185.1,2914.76, -256,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,553.5171,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,421.0368,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,974.5539,0,185.1,2914.76, -256,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,359.4797,_ZN5aiter49fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,359.4797,1,250.9,3981.61, -256,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,359.4797,_ZN5aiter49fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,359.4797,1,250.9,3981.61, -256,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.6859,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,125.2007,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.8866,0,353.19,2816.81, -256,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.6859,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,125.2007,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.8866,0,353.19,2816.81, -256,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,168.5532,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,205.3887,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,373.9419,0,241.2,3827.62, -256,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,168.5532,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,205.3887,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,373.9419,0,241.2,3827.62, -256,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.5261,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,225.2001,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,553.7262,0,325.77,5129.96, -256,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.5261,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,225.2001,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,553.7262,0,325.77,5129.96, -256,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,169.5988,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,205.5691,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,375.1679,0,240.41,3815.11, -256,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,169.5988,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,205.5691,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,375.1679,0,240.41,3815.11, -80,4096,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, -80,8192,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, -256,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,91.8397,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.1852,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,251.0249,0,307.98,2456.18, -256,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,91.8397,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.1852,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,251.0249,0,307.98,2456.18, -256,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.8711,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,248.6884,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,582.5595000000001,0,309.65,4876.06, -256,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.8711,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,248.6884,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,582.5595000000001,0,309.65,4876.06, -256,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.2553,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.4126,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1020.6679,0,176.74,5551.76, -256,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.2553,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.4126,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1020.6679,0,176.74,5551.76, -80,4096,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,553.26,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,553.26,1,139.73,591.32, -80,8192,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,553.26,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x192E,0.0%,0.0,Null,0.0%,553.26,1,139.73,591.32, -256,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,325.7872,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,223.4421,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.2293,0,164.22,5185.32, -256,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,325.7872,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,223.4421,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.2293,0,164.22,5185.32, +80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, +80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, +80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,567.2689,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,347.973,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.8%,915.2419,0,24.64,3082.6, +80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,569.0736,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,348.0744,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,917.148,0,24.59,3076.19, +80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,560.7307,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,356.8918,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,917.6225,0,12.29,3075.6, +80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,390.1172,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,531.1275,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,921.2447,0,97.9,1553.67, +80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,390.1172,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,531.1275,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,921.2447,0,97.9,1553.67, 80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,390.1172,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,531.1275,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,921.2447,0,97.9,1553.67, +80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,390.1172,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,531.1275,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,921.2447,0,97.9,1553.67, +80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,390.1172,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,531.1275,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,921.2447,0,97.9,1553.67, 80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,390.1172,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,531.1275,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,921.2447,0,97.9,1553.67, -256,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.6237,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,125.9531,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,218.5768,0,353.69,2820.81, -256,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.6237,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,125.9531,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,218.5768,0,353.69,2820.81, -256,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,94.4833,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,162.4132,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,256.8965,0,300.94,2400.04, -256,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,94.4833,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,162.4132,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,256.8965,0,300.94,2400.04, -256,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,171.882,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,152.5554,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,324.4374,0,238.29,3774.96, -256,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,171.882,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,152.5554,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,324.4374,0,238.29,3774.96, -256,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.8822,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,228.9235,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,554.8057,0,162.57,5133.21, -256,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.8822,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,228.9235,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,554.8057,0,162.57,5133.21, -80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, +80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,568.3004,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,363.255,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,931.5554,0,12.1,3029.6, +80,128,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,569.5169,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.7784,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,942.2953,0,11.96,2995.07, +80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,579.7023,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.1%,364.33,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,944.0323,0,23.89,2988.59, +80,128,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,560.9087,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,389.4697,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,950.3784,0,11.86,2969.6, +80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,578.2908,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,375.1603,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,953.4511,0,47.3,2961.95, +80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,583.1963,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf3E,0.1%,372.1123,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,955.3086,0,23.6,2953.31, +80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,591.6172,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,377.301,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,968.9182,0,23.27,2916.56, +80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,597.1745,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,373.7546,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,970.9291,0,46.45,2908.63, +80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,596.114,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,376.262,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,972.376,0,46.38,2904.31, +80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,593.7672,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,382.7183,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,976.4855,0,23.09,2893.96, +80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,613.9345,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,362.7996,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,976.7341,0,46.17,2891.35, +80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,586.3089,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,394.6809,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,980.9898,0,22.99,2876.0, +80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,571.3622,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,410.8419,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,982.2041,0,22.96,2872.44, +80,256,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,587.3979,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,399.9114,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,987.3093,0,22.84,2862.24, +80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,570.6719,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.1%,417.051,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,987.7229,0,22.83,2856.39, +80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,570.8776,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,416.9556,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,987.8332,0,22.83,2856.07, +80,256,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,588.4224,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,400.0835,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,988.5059,0,22.81,2858.77, +80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,591.8466,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.1%,398.0473,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,989.8939,0,45.56,2852.91, +80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,582.0633,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,408.0174,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,990.0807,0,22.77,2849.59, +80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,571.3867,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,421.5456,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,992.9323,0,22.71,2841.41, +80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,601.7144,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,402.2826,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1003.997,0,44.92,2812.83, +80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,610.1172,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,408.2756,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1018.3928,0,44.28,2773.07, +80,256,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1021.9423,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,1021.9423,1,220.64,1427.51, +80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,591.7744,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,430.4532,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1022.2276,0,44.12,2762.67, +80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,621.3374,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,402.5935,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1023.9309,0,44.04,2767.03, +80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,595.5339,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,429.8353,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1025.3692,0,43.98,2754.21, +80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,617.3315,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,411.0096,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1028.3411,0,43.85,2755.17, +80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,597.6775,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,432.5419,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1030.2194,0,43.77,2741.24, +80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,600.7597,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,432.403,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1033.1627,0,43.65,2733.43, +80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,615.4904,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,418.179,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1033.6694,0,43.63,2732.09, +80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,624.0096,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf2E,0.0%,411.5878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1035.5974,0,87.09,2732.32, +80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,625.392,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,411.3256,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1036.7176,0,87.0,2729.37, +80,512,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,617.8863,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,423.4145,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1041.3008,0,43.31,2720.88, +80,512,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,619.5779,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,428.3394,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1047.9173,0,43.04,2703.7, +80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,625.1681,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf2E,0.0%,436.7729,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1061.941,0,84.93,2664.54, +80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,626.1058,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf2E,0.0%,442.0281,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1068.1339,0,84.44,2649.09, +80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,619.0301,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf2E,0.0%,468.3093,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1087.3393999999998,0,82.95,2602.3, +80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,620.397,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,467.8512,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1088.2482,0,82.88,2600.13, +80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,618.0944,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.1%,476.0445,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1094.1389,0,82.43,2586.13, +80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,617.4891,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,481.3596,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1098.8487,0,82.08,2575.04, +80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,712.3917,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,389.8042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1102.1959,0,5.11,5115.3, +80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,708.8776,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,395.3054,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1104.183,0,5.11,5106.09, +80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, 80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, +80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, +80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, +80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, +80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, +80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, +80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, +80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, +80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, +80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, +80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, +80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, +80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, +80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, +80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, +80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, +80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, +80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, +80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, +80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, +80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, +80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, +80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, +80,32,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,714.79,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,400.2169,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,1115.0069,0,5.06,5056.53, +80,32,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,714.05,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,401.6611,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.4%,1115.7111,0,5.05,5053.34, +80,256,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,1166.708,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.9%,0.0,Null,0.0%,1166.708,1,193.27,1250.38, +80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1195.0627,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1195.0627,1,150.94,1216.11, +80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1195.0627,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1195.0627,1,150.94,1216.11, +80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1195.0627,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1195.0627,1,150.94,1216.11, +80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,703.0772,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,515.3501,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,1218.4273,0,74.03,2322.32, +80,512,6144,4096,8,2,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,774.6328,moe_ck2stages_gemm1_256x64x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,459.0113,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCastExpertWeight_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.3%,1233.6441,0,125.34,989.38, +80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,725.7726,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,508.3457,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1234.1183,0,73.08,2292.8, +80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,727.1819,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,509.1783,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1236.3602,0,72.95,2288.64, +80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,734.3088,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,509.0543,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1243.3631,0,72.54,2275.75, +80,4096,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1278.9473,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1278.9473,1,158.68,1140.65, +80,8192,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1278.9473,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1278.9473,1,158.68,1140.65, +80,2048,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1278.9473,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1278.9473,1,158.68,1140.65, +80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, +80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, +80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, +80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, +80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, +80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, +80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, +80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, +80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, +80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, +80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, +80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, +80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, +80,2048,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, +80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, +80,16384,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, +80,1024,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, +80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, +80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, +80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, +80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, +80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, +80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, +80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, 80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,759.5362,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,596.1779,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1355.7141,0,133.06,2095.27, +80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,759.5362,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,596.1779,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1355.7141,0,133.06,2095.27, 80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,759.5362,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,596.1779,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1355.7141,0,133.06,2095.27, -80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, -80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, -256,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,572.8802,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,420.9959,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,993.8761,0,181.5,2858.1, -256,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,572.8802,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,420.9959,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,993.8761,0,181.5,2858.1, -80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, +80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,759.5362,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,596.1779,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1355.7141,0,133.06,2095.27, +80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,759.5362,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,596.1779,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1355.7141,0,133.06,2095.27, +80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,759.5362,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,596.1779,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1355.7141,0,133.06,2095.27, +80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, +80,16384,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, +80,1024,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, 80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, -256,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,169.6864,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,153.1648,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,322.8512,0,239.46,3793.5, -256,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,169.6864,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,153.1648,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,322.8512,0,239.46,3793.5, -256,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.3028,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,368.4383,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1017.7411,0,177.24,5567.73, -256,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.3028,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,368.4383,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1017.7411,0,177.24,5567.73, -256,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,327.5497,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,402.6535,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,730.2032,0,123.52,1960.15, -256,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,327.5497,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,402.6535,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,730.2032,0,123.52,1960.15, -80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, +80,2048,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, +80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, +80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, +80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, +80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, +80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, +80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, +80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, +80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, +80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, +80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, +80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, +80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, +80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, +80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, +80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, +80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, +80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, +80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, +80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, +80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, +80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, +80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, +80,1024,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, +80,16384,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, +80,2048,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, +80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, +80,2048,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, 80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, -80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, -80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, -80,4096,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, -80,8192,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, -80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, -80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, -256,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,92.5497,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,158.3452,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,250.8949,0,308.13,2457.45, -256,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,92.5497,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,158.3452,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,250.8949,0,308.13,2457.45, -256,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,333.2413,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,244.2778,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,577.5191,0,312.35,4918.61, -256,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,333.2413,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,244.2778,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,577.5191,0,312.35,4918.61, -256,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,162.8098,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,221.1124,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,383.9222,0,234.93,3728.12, -256,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,162.8098,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,221.1124,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,383.9222,0,234.93,3728.12, -256,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,647.0892,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,363.288,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1010.3772,0,178.54,5608.31, -256,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,647.0892,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,363.288,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1010.3772,0,178.54,5608.31, -256,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.9474,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_64x256_pf2E,0.0%,225.6663,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,554.6137,0,325.25,5121.75, -256,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.9474,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_64x256_pf2E,0.0%,225.6663,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,554.6137,0,325.25,5121.75, -256,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,366.6991,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,366.6991,1,245.96,3903.22, -256,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,366.6991,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_32x256E,0.0%,0.0,Null,0,366.6991,1,245.96,3903.22, -80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, -80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, -80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, -80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, -80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, -80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, -256,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.7102,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,144.3167,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.0269,0,244.63,3875.42, -256,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.7102,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,144.3167,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.0269,0,244.63,3875.42, -80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,268.8237,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,301.9153,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,570.739,0,135.45,1080.29, -80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,268.8237,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,301.9153,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,570.739,0,135.45,1080.29, -80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, -80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, -80,4096,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, -80,8192,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, -256,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,326.6863,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,223.1283,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.8146,0,164.04,5179.81, -256,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,326.6863,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,223.1283,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.8146,0,164.04,5179.81, -80,4096,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1278.9473,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1278.9473,1,158.68,1140.65, -80,8192,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1278.9473,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1278.9473,1,158.68,1140.65, -256,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,575.7451,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,426.4277,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1002.1728,0,180.0,2834.43, -256,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,575.7451,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,426.4277,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1002.1728,0,180.0,2834.43, -80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, -80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, -256,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.8959,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,144.4775,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.3734,0,244.36,3871.17, -256,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.8959,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,144.4775,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.3734,0,244.36,3871.17, -80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, -80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, -256,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,644.3248,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,363.2348,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1007.5596,0,179.04,5623.99, -256,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,644.3248,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,363.2348,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1007.5596,0,179.04,5623.99, -80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1195.0627,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1195.0627,1,150.94,1216.11, -80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1195.0627,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,1195.0627,1,150.94,1216.11, -80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, -80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, -80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, -80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, -80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, -80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, -80,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, -80,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, -80,4096,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, -80,8192,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, -80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, -80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, -256,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.6203,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x256_pf2E,0.0%,227.39,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,556.0102999999999,0,324.43,5108.88, -256,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.6203,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x256_pf2E,0.0%,227.39,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,556.0102999999999,0,324.43,5108.88, -256,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,163.9563,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf3E,0.0%,218.341,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,382.2973,0,235.93,3743.96, -256,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,163.9563,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf3E,0.0%,218.341,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,382.2973,0,235.93,3743.96, +80,16384,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, +80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, +80,1024,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, 80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,749.1329,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.2144,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1444.3473,0,124.89,1966.7, +80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,749.1329,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.2144,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1444.3473,0,124.89,1966.7, +80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,749.1329,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.2144,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1444.3473,0,124.89,1966.7, 80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,749.1329,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.2144,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1444.3473,0,124.89,1966.7, -256,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,553.6587,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,425.3929,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,979.0516,0,184.25,2901.37, -256,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,553.6587,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,425.3929,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,979.0516,0,184.25,2901.37, -80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, -80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, -80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, -80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, -80,4096,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, -80,8192,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, -80,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, -80,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, -80,4096,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, -80,8192,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, -256,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,128.833,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,473.6369,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,602.4699,0,128.32,1023.39, -256,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,128.833,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,473.6369,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,602.4699,0,128.32,1023.39, -80,4096,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, -80,8192,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, -256,4096,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.1182,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,125.1912,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.3094,0,354.13,2824.26, -256,8192,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.1182,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,125.1912,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.3094,0,354.13,2824.26, +80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,749.1329,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.2144,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1444.3473,0,124.89,1966.7, +80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,749.1329,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.2144,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1444.3473,0,124.89,1966.7, 80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,752.5409,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.4453,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1447.9861999999998,0,124.58,1961.75, +80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,752.5409,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.4453,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1447.9861999999998,0,124.58,1961.75, +80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,752.5409,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.4453,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1447.9861999999998,0,124.58,1961.75, 80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,752.5409,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.4453,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1447.9861999999998,0,124.58,1961.75, -256,4096,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,171.438,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,205.2641,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,376.7021,0,239.43,3799.57, -256,8192,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,171.438,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,205.2641,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,376.7021,0,239.43,3799.57, -80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, -80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, -80,4096,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, -80,8192,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, -256,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,328.7665,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,403.3563,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,732.1228,0,123.2,1955.01, -80,32768,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, -80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,495.1237,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,858.9692,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1354.0929,0,57.09,455.33, -80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,556.9903,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,555.6941,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1112.6844,0,81.06,1286.35, -80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,390.4133,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,410.897,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,801.3103,0,112.56,1786.21, -80,32768,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,141.5184,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,122.9113,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,264.4297,0,73.09,1173.77, -80,32768,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,285.499,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,233.8056,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,519.3046,0,74.44,1195.36, -256,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,64,0,133.8828,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,474.349,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,608.2318,0,127.11,1013.7, -80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,783.2767,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,600.1759,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,1383.4526,0,65.2,2058.57, -80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,266.6047,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,248.2173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,514.822,0,150.17,1197.62, -80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,394.9761,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,427.6178,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,822.5939,0,109.65,1739.99, -80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,762.665,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,620.1682,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,1382.8332,0,130.45,2054.18, -256,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,367.9317,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0,367.9317,1,245.14,3890.14, -256,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.1281,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,249.1231,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,582.2512,0,309.81,4878.64, -80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.4171,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,354.8123,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.2293999999999,0,87.93,1392.97, -256,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,329.0517,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,400.2276,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,729.2793,0,123.68,1962.63, -80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,394.0843,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,517.6212,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,911.7055,0,98.93,1569.92, -256,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.393,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,125.505,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,217.898,0,354.8,2829.59, -256,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,168.3246,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,205.6813,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,374.0059,0,241.16,3826.96, -256,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.7642,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_64x128_2tg_pf2E,0.0%,226.0569,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,554.8211,0,325.13,5119.83, -80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, -80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.1056,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight0_silu_F8_F8_B16,0.0%,314.319,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,586.4246,0,131.83,1051.39, -80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, -80,32768,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,283.4477,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,254.2291,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,537.6768,0,71.89,1154.52, -256,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.0568,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,231.4032,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,556.46,0,162.09,5117.95, -256,32768,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,86.9012,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,86.7313,moe_ck2stages_gemm2_256x128x64x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,173.6325,0,222.62,3575.12, -80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,269.7206,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,263.4387,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.4%,533.1593,0,145.0,1156.43, -256,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,92.8309,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.4309,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,252.2618,0,306.46,2444.14, -80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,784.9765,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,567.3861,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1352.3626,0,66.69,2105.89, -80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,761.7135,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,621.4747,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,3.3%,1383.1882,0,130.42,2053.66, -80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, -80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,763.616,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,611.4583,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1375.0743,0,131.18,2065.77, -256,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,332.9031,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,245.6419,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,578.545,0,311.8,4909.89, -80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,271.6424,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x128_3tg_pf3E,0.0%,302.5119,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,574.1543,0,134.65,1073.86, -256,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,553.5171,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,421.0368,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.7%,974.5539,0,185.1,2914.76, -256,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,32,0,359.4797,_ZN5aiter52fmoe_bf16_blockscaleFp8_g1u1_novs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,359.4797,1,250.9,3981.61, -256,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.6859,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,125.2007,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.8866,0,353.19,2816.81, -256,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,168.5532,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,205.3887,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,373.9419,0,241.2,3827.62, -256,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.5261,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,225.2001,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,553.7262,0,325.77,5129.96, -256,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,169.5988,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,205.5691,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,375.1679,0,240.41,3815.11, -80,32768,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, -256,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,128,0,91.8397,_ZN5aiter45fmoe_stage1_bf16_pertokenFp8_g1u1_128x128_pf3E,0.0%,159.1852,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.4%,251.0249,0,307.98,2456.18, -256,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,333.8711,_ZN5aiter47fmoe_stage1_bf16_pertokenFp8_g1u1_32x64_4tg_pf2E,0.0%,248.6884,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,582.5595000000001,0,309.65,4876.06, -256,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.2553,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.4126,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1020.6679,0,176.74,5551.76, -256,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,325.7872,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,223.4421,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.2293,0,164.22,5185.32, -80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,390.1172,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x256_2tg_pf3E,0.0%,531.1275,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,1.6%,921.2447,0,97.9,1553.67, -256,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,128,0,92.6237,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,125.9531,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,218.5768,0,353.69,2820.81, -256,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,94.4833,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,162.4132,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,256.8965,0,300.94,2400.04, -256,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,171.882,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,152.5554,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,324.4374,0,238.29,3774.96, -256,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,325.8822,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,228.9235,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.3%,554.8057,0,162.57,5133.21, -80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,560.9966,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,549.7671,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.6%,1110.7637,0,81.2,1288.58, -80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,759.5362,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,596.1779,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,1355.7141,0,133.06,2095.27, -80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, -256,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,16,0,572.8802,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.1%,420.9959,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.8%,993.8761,0,181.5,2858.1, -80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,64,0,509.2109,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,865.0837,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.4%,1374.2946,0,56.25,448.64, -256,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,128,0,169.6864,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,153.1648,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,322.8512,0,239.46,3793.5, -256,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,64,0,649.3028,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,368.4383,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1017.7411,0,177.24,5567.73, -256,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,327.5497,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,402.6535,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,730.2032,0,123.52,1960.15, -80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,785.2244,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,600.8826,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,1.6%,1386.107,0,65.07,2054.63, -80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,557.7338,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,556.9381,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1114.6719,0,80.92,1284.06, -80,32768,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,140.8771,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,134.5253,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,275.4024,0,70.18,1127.0, -80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,398.3967,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,410.8972,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,809.2939,0,111.45,1768.59, -256,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,128,0,92.5497,_ZN5aiter54fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_128x128_pf3E,0.0%,158.3452,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,250.8949,0,308.13,2457.45, -256,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,333.2413,_ZN5aiter56fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x64_4tg_pf2E,0.0%,244.2778,moe_ck2stages_gemm2_256x32x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,577.5191,0,312.35,4918.61, -256,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,162.8098,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf2E,0.0%,221.1124,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,383.9222,0,234.93,3728.12, -256,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,647.0892,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,363.288,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1010.3772,0,178.54,5608.31, -256,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,328.9474,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_64x256_pf2E,0.0%,225.6663,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,2.8%,554.6137,0,325.25,5121.75, -256,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,0,32,0,366.6991,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_gelu_1tg_ps_32x256E,0.0%,0.0,Null,0,366.6991,1,245.96,3903.22, +80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,752.5409,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.4453,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1447.9861999999998,0,124.58,1961.75, +80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,752.5409,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.4453,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1447.9861999999998,0,124.58,1961.75, +80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, +80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, +80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, +80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, 80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, -80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.5384,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,372.5179,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.5%,896.0563,0,86.28,1366.81, -80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, -256,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.7102,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,144.3167,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.0269,0,244.63,3875.42, -80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,268.8237,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant2_MulRoutedWeight1_silu_F8_F8_B16,0.0%,301.9153,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,570.739,0,135.45,1080.29, -80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,264.4639,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,263.7878,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,0.5%,528.2517,0,146.35,1167.18, -80,32768,4096,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,284.4745,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,232.9485,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,517.423,0,74.71,1199.71, -256,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,326.6863,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,223.1283,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,549.8146,0,164.04,5179.81, -80,32768,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,827.4898,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,827.4898,1,122.62,1736.35, -256,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,575.7451,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.1%,426.4277,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1002.1728,0,180.0,2834.43, -80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,64,0,500.5853,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,854.6331,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1355.2184,0,57.05,454.95, -256,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,171.8959,moe_ck2stages_gemm1_256x128x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,144.4775,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,316.3734,0,244.36,3871.17, -80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,1,32,0,269.4957,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight1_silu_F8_F8_B16,0.0%,248.2335,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,517.7292,0,149.32,1190.9, -256,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,64,0,644.3248,moe_ck2stages_gemm1_256x64x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,363.2348,moe_ck2stages_gemm2_256x64x128x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1007.5596,0,179.04,5623.99, -80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,723.0851,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x256E,0.0%,0.0,Null,0.0%,723.0851,1,124.74,1979.44, +80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.417,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,717.4604,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,3.3%,1467.8774,0,122.89,1935.17, +80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, +80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, +80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, +80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, +80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, 80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,750.5786,_ZN5aiter44fmoe_stage1_bf16_pertokenFp8_g1u1_32x512_pf3E,0.0%,718.1941,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,2.8%,1468.7727,0,122.82,1933.99, -80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,781.9082,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,569.3894,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1351.2976,0,66.75,2107.55, +80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,985.8866,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,559.8234,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.5%,1545.71,0,7.29,3648.15, +80,64,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,979.7129,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,568.5654,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1548.2783,0,7.28,3642.1, +80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,998.4164,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,561.412,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.9%,1559.8284,0,7.23,3615.13, +80,64,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1025.0488,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,559.4474,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1584.4962,0,7.12,3558.85, +80,512,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1749.1923,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,1749.1923,1,257.82,837.15, +80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1118.5366,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,643.0011,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1761.5377,0,12.8,3202.21, +80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1135.1695,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,627.8855,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1763.0549999999998,0,12.79,3199.45, +80,128,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1128.3663,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,636.7004,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1765.0667,0,12.77,3195.81, +80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,128,0,27.2341,moe_ck2stages_gemm1_256x128x128x128_1x4_TypeCastExpertWeight_v3_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,1749.6879,moe_ck2stages_gemm2_256x128x128x64_1x4_TypeCast_v3_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1776.9219999999998,0,50.76,3180.68, +80,128,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1134.7112,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,643.4691,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1778.1803,0,12.68,3172.24, +80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1149.8246,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,670.1762,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1820.0008,0,24.78,3101.36, +80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1171.5851,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,662.7687,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1834.3538,0,24.58,3077.1, +80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1174.4719,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,676.1183,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1850.5902,0,24.37,3050.1, +80,256,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1186.9346,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,673.7193,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1860.6539,0,24.24,3033.6, +80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, +80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, +80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, +80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, +80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, +80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1027.6111,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,841.9358,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1869.5469,0,96.49,1519.4, +80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, +80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, +80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, +80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, +80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, +80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,1032.1656,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,839.6044,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,1871.77,0,96.37,1517.6, +80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, +80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, +80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, +80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, +80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, +80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08, +80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, +80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, +80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, +80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, +80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, +80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1059.0782,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,841.9574,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1901.0356,0,94.89,1494.23, +80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1216.8417,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,726.8006,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,1943.6423,0,46.4,2907.85, +80,512,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1257.2784,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,722.716,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,1979.9944,0,45.55,2854.46, +80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1256.2287,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,729.0988,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,1985.3275,0,45.43,2846.8, +80,512,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,2209.3824,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.8%,0.0,Null,0.0%,2209.3824,1,204.12,662.78, +80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, +80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, +80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, +80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, +80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, +80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, +80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, +80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, +80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, +80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, +80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, +80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1551.7224,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,998.809,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2550.5314,0,70.73,2221.7, +80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, 80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, -80,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,558.3594,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_silu_F8_F8_B16,0.0%,555.8992,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1114.2586,0,80.95,1284.54, -80,32768,2048,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,144.9248,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,123.3403,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,268.2651,0,72.05,1156.98, +80,4096,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, +80,2048,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, +80,16384,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, +80,8192,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4364,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,1026.4073,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,3.3%,2571.8437000000004,0,70.14,2203.29, +80,4096,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, +80,16384,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, 80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, -256,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,64,0,328.6203,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x256_pf2E,0.0%,227.39,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,556.0102999999999,0,324.43,5108.88, -256,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,64,0,163.9563,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_64x128_2tg_pf3E,0.0%,218.341,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,382.2973,0,235.93,3743.96, -80,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,749.1329,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.2144,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1444.3473,0,124.89,1966.7, -256,32768,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,1,16,0,553.6587,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.1%,425.3929,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,979.0516,0,184.25,2901.37, -80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,524.8862,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_gelu_B16_B16_B16,0.0%,354.9441,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,879.8303000000001,0,87.87,1392.01, -80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Tensor,1,0,32,0,388.4815,moe_ck2stages_gemm1_256x32x64x256_1x4_MulABScale_v1_Nswizzle0_Quant1_MulRoutedWeight0_gelu_F8_F8_B16,0.0%,429.5016,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.6%,817.9830999999999,0,110.26,1749.8, -80,32768,2048,192,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,143.5168,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,134.0455,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.0%,277.5623,0,69.63,1118.23, -80,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,272.9741,_ZN5aiter48fmoe_stage1_bf16_pertokenFp8_g1u1_32x128_3tg_pf3E,0.0%,313.1772,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight1_F8_F8_B16,0.5%,586.1513,0,131.89,1051.88, -80,32768,4096,192,128,8,ActivationType.Gelu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,286.2896,moe_ck2stages_gemm1_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_gelu_B16_B16_B16,0.0%,254.7276,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.1%,541.0172,0,71.45,1147.39, -256,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_1x128,1,0,64,0,128.833,moe_ck2stages_gemm1_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight0_gelu_F8_F8_B16,0.1%,473.6369,moe_ck2stages_gemm2_256x64x128x128_1x4_MulABScaleExpertWeightA8W8blkscale_v3_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,15.6%,602.4699,0,128.32,1023.39, -80,32768,7168,256,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,385.5365,_ZN5aiter57fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x256_2tg_pf3E,0.0%,515.5173,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,901.0538,0,100.1,1588.48, -256,32768,4096,384,128,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,1,128,0,93.1182,moe_ck2stages_gemm1_256x128x64x128_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,125.1912,moe_ck2stages_gemm2_256x128x128x128_1x4_TypeCast_v3_Nswizzle0_Quant1_MulRoutedWeight0_F8_F8_B16,0.0%,218.3094,0,354.13,2824.26, -80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,1,32,0,752.5409,_ZN5aiter53fmoe_stage1_bf16_pertokenFp8_doweight_g1u1_32x512_pf3E,0.0%,695.4453,moe_ck2stages_gemm2_256x32x64x128_1x4_MulABScaleExpertWeight_v1_Nswizzle0_Quant2_MulRoutedWeight0_F8_F8_B16,0.0%,1447.9861999999998,0,124.58,1961.75, -256,32768,7168,256,256,8,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Tensor,1,0,64,0,171.438,moe_ck2stages_gemm1_256x64x64x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight0_silu_F8_F8_B16,0.0%,205.2641,moe_ck2stages_gemm2_256x64x128x256_1x4_TypeCast_v1_Nswizzle0_Quant1_MulRoutedWeight1_F8_F8_B16,1.3%,376.7021,0,239.43,3799.57, -80,32768,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,1,32,0,1538.2556,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_silu_B16_B16_B16,0.0%,1005.4051,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_B16_B16_B16,0.0%,2543.6607,0,70.92,2227.7, -80,32768,4096,384,128,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,523.4769,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,371.6201,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,0.4%,895.097,0,86.37,1368.27, - +80,1024,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, +80,8192,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, +80,2048,7168,512,256,8,ActivationType.Silu,torch.bfloat16,torch.bfloat16,torch.bfloat16,QuantType.No,1,0,32,0,1545.4564,moe_ck2stages_gemm1_256x32x64x128_1x4_TypeCast_v1_Nswizzle0_Quant0_MulRoutedWeight0_silu_B16_B16_B16,0.0%,1031.1042,moe_ck2stages_gemm2_256x32x64x64_1x4_TypeCastExpertWeight_v1_Nswizzle0_Quant0_MulRoutedWeight1_B16_B16_B16,2.8%,2576.5606,0,70.01,2199.25, +80,2048,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, +80,32768,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, +80,16384,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter48fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_ps_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, +80,4096,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, +80,8192,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, +80,1024,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26, +80,32768,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, +80,16384,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, +80,4096,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, +80,2048,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, +80,8192,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78, +80,1024,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78,