diff --git a/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_ds_v3.csv b/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_ds_v3.csv index 4d3d45ab3a..5b18451ffc 100644 --- a/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_ds_v3.csv +++ b/aiter/configs/model_configs/a8w8_blockscale_tuned_gemm_ds_v3.csv @@ -66,3 +66,580 @@ cu_num,M,N,K,kernelId,splitK,us,kernelName,tflops,bw,errRatio 256,16384,7168,2048,0,0,330.8182,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1454.08,855.8,0.0 256,20480,7168,2048,0,0,630.5854,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,953.55,555.4,0.0 256,32768,7168,2048,0,0,649.7976,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,1480.57,848.8,0.0 +80,1,2112,7168,8,0,29.6812,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,24576,1536,8,0,21.7723,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,32768,512,8,0,11.9511,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,7168,16384,8,0,80.4411,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,36864,7168,8,0,126.2421,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,7168,18432,8,0,90.1639,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,128,7168,13,0,31.0068,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,1,8192,1536,8,0,10.8235,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,2240,7168,8,0,29.5828,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,32768,1536,8,0,28.4212,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,3072,1536,8,0,8.805,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,4096,512,8,0,4.2994,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,7168,2048,8,0,13.3935,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,4608,7168,8,0,30.2356,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,7168,2304,8,0,13.4935,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,11264,1536,8,0,14.1259,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,4096,7168,8,0,30.2807,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,512,7168,8,0,28.6767,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1,7168,256,6,0,4.389,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,2112,7168,8,0,29.936,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,24576,1536,8,0,22.0947,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,32768,512,8,0,13.3839,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,7168,16384,8,0,81.0783,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,36864,7168,8,0,127.8185,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,7168,18432,8,0,87.2351,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,128,7168,8,0,22.9607,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,8192,1536,8,0,11.0715,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,2240,7168,8,0,29.5412,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,32768,1536,8,0,28.4788,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,3072,1536,8,0,8.5502,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,4096,512,8,0,4.319,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,7168,2048,8,0,13.5635,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,4608,7168,18,0,35.42,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,2,7168,2304,8,0,13.5891,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,11264,1536,8,0,14.8831,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,4096,7168,8,0,30.4452,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,512,7168,8,0,28.3127,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2,7168,256,6,0,4.3638,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,2112,7168,8,0,30.8,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,24576,1536,8,0,22.3543,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,32768,512,8,0,11.9811,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,7168,16384,8,0,81.1975,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,36864,7168,8,0,129.7421,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,7168,18432,8,0,93.2707,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,128,7168,8,0,23.1411,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,8192,1536,8,0,11.1999,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,2240,7168,8,0,30.7308,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,32768,1536,8,0,28.9084,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,3072,1536,8,0,8.4646,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,4096,512,8,0,4.3014,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,7168,2048,8,0,13.6655,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,4608,7168,8,0,31.122,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,7168,2304,7,0,12.4283,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0,0,0 +80,4,11264,1536,8,0,13.9487,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,4096,7168,8,0,31.1835,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,512,7168,8,0,29.6291,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,4,7168,256,6,0,4.3806,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,2112,7168,8,0,31.0184,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,24576,1536,8,0,22.9327,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,32768,512,8,0,12.1631,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,7168,16384,8,0,84.4723,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,36864,7168,8,0,131.4905,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,7168,18432,8,0,93.7435,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,128,7168,8,0,23.3771,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,8192,1536,8,0,11.3803,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,2240,7168,8,0,31.1992,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,32768,1536,6,0,41.504,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,3072,1536,8,0,9.7794,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,4096,512,8,0,4.2922,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,7168,2048,8,0,13.9275,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,4608,7168,8,0,31.6788,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,7168,2304,7,0,12.4415,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0,0,0 +80,8,11264,1536,8,0,14.3103,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,4096,7168,8,0,31.6459,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,512,7168,8,0,30.1107,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,8,7168,256,8,0,4.2986,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,64,7168,8,0,22.9407,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,128,7168,8,0,22.9747,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,2112,7168,8,0,23.8827,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,7168,16384,7,0,81.0135,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0,0,0 +80,16,8192,1536,8,0,10.1315,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,32768,512,8,0,12.3079,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,24576,1536,8,0,20.9171,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,36864,7168,8,0,126.5277,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,7168,18432,7,0,83.8083,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0,0,0 +80,16,2240,7168,8,0,24.0495,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,32768,1536,8,0,27.3928,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,11264,1536,8,0,13.7603,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,16,4096,7168,8,0,29.3875,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,32,64,7168,8,0,22.2799,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,32,128,7168,8,0,22.8355,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,32,2112,7168,8,0,23.8695,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,32,7168,16384,12,0,90.6679,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,32,8192,1536,12,0,12.3951,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,32,32768,512,13,0,18.3123,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,32,24576,1536,13,0,32.0868,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,32,36864,7168,13,0,181.66,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,32,7168,18432,12,0,91.0487,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,32,2240,7168,8,0,24.0079,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,32,32768,1536,13,0,42.9012,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,32,11264,1536,8,0,19.3971,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,32,4096,7168,7,0,32.7,a8w8_blockscale_1x128x128_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8_1x2_intrawave_v1,0,0,0 +80,64,64,7168,8,0,22.0451,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,64,128,7168,8,0,22.2911,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,64,2112,7168,8,0,30.6896,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,64,7168,16384,18,0,142.6302,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,64,8192,1536,18,0,16.5967,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,64,32768,512,18,0,24.9151,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,64,24576,1536,18,0,37.2172,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,64,36864,7168,18,0,204.0713,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,64,7168,18432,18,0,130.9413,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,64,2240,7168,8,0,30.7432,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,64,32768,1536,18,0,50.8693,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,64,11264,1536,18,0,23.2527,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,64,4096,7168,18,0,33.6092,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,2112,7168,18,0,36.1692,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,24576,1536,2,0,71.0526,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,96,32768,512,10,0,39.2468,a8w8_blockscale_1x128x128_256x32x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,7168,16384,12,0,226.0095,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,36864,7168,12,0,375.3626,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,7168,18432,18,0,211.557,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,128,7168,8,0,19.2111,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,96,8192,1536,2,0,29.3651,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,96,2240,7168,18,0,36.3512,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,32768,1536,3,0,90.8503,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,96,3072,1536,12,0,12.2499,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,4096,512,18,0,9.8511,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,7168,2048,18,0,30.9572,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,4608,7168,18,0,54.5221,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,7168,2304,3,0,31.2932,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,96,11264,1536,18,0,38.5268,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,4096,7168,10,0,70.1189,a8w8_blockscale_1x128x128_256x32x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,96,512,7168,8,0,22.4563,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,96,7168,256,6,0,9.3194,a8w8_blockscale_1x128x128_256x16x64x128_8x16_16x16_16x16x1_8x32x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,128,64,7168,8,0,21.7027,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,128,128,7168,8,0,21.7803,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,128,2112,7168,18,0,32.8696,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,128,7168,16384,18,0,187.9301,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,128,8192,1536,18,0,29.2096,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,128,32768,512,16,0,41.9332,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,128,24576,1536,0,0,71.0226,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,128,36864,7168,0,0,355.7861,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,128,7168,18432,18,0,189.0192,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,128,2240,7168,18,0,32.9736,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,128,32768,1536,0,0,90.0163,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,128,11264,1536,18,0,36.5868,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,128,4096,7168,18,0,51.234,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,160,2112,7168,18,0,53.5813,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,160,24576,1536,3,0,100.982,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,160,32768,512,3,0,59.4173,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,160,7168,16384,18,0,293.8983,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,160,36864,7168,18,0,559.152,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,160,7168,18432,2,0,379.9103,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,160,128,7168,8,0,19.6555,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,160,8192,1536,3,0,38.2228,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,160,2240,7168,18,0,54.0881,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,160,32768,1536,2,0,130.1209,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,160,3072,1536,18,0,16.9339,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,160,4096,512,11,0,11.1599,a8w8_blockscale_1x128x128_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,160,7168,2048,18,0,47.0101,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,160,4608,7168,3,0,81.3331,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,160,7168,2304,10,0,45.9093,a8w8_blockscale_1x128x128_256x32x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,160,11264,1536,3,0,50.0469,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,160,4096,7168,12,0,71.1633,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,160,512,7168,8,0,28.3099,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,160,7168,256,11,0,10.8494,a8w8_blockscale_1x128x128_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,192,2112,7168,18,0,49.1485,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,24576,1536,18,0,102.1488,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,32768,512,16,0,58.0269,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,7168,16384,12,0,381.2924,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,36864,7168,18,0,513.2381,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,7168,18432,18,0,293.6094,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,128,7168,8,0,19.7799,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,192,8192,1536,16,0,38.6744,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,2240,7168,18,0,49.4897,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,32768,1536,2,0,132.3045,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,192,3072,1536,18,0,16.5627,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,4096,512,18,0,12.7227,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,7168,2048,18,0,44.5893,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,4608,7168,18,0,78.2074,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,7168,2304,3,0,42.1737,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,192,11264,1536,18,0,49.4729,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,4096,7168,18,0,78.657,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,192,512,7168,8,0,26.9995,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,192,7168,256,16,0,12.7423,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,224,2112,7168,18,0,54.1121,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,224,24576,1536,0,0,128.7353,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,224,32768,512,16,0,74.8462,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,224,7168,16384,18,0,349.3654,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,224,36864,7168,2,0,725.3164,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,224,7168,18432,18,0,381.1539,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,224,128,7168,8,0,20.1755,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,224,8192,1536,3,0,48.1293,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,224,2240,7168,18,0,54.4329,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,224,32768,1536,2,0,167.1315,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,224,3072,1536,3,0,23.8183,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,224,4096,512,10,0,14.9871,a8w8_blockscale_1x128x128_256x32x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,224,7168,2048,2,0,55.3617,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,224,4608,7168,2,0,103.4296,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,224,7168,2304,3,0,53.5745,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,224,11264,1536,3,0,62.5822,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,224,4096,7168,2,0,100.7899,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,224,512,7168,8,0,28.0503,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,224,7168,256,11,0,13.9371,a8w8_blockscale_1x128x128_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,256,64,7168,8,0,20.9311,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,256,128,7168,8,0,21.0991,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,256,2112,7168,18,0,49.4557,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,256,7168,16384,18,0,343.3197,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,256,8192,1536,0,0,45.4237,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,256,32768,512,16,0,74.2042,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,256,24576,1536,0,0,108.7836,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,256,36864,7168,0,0,606.9262,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,256,7168,18432,18,0,342.1461,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,256,2240,7168,18,0,49.7517,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,256,32768,1536,0,0,151.9194,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,256,11264,1536,18,0,62.9586,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,256,4096,7168,0,0,107.5719,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,288,2112,7168,13,0,81.8363,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,288,24576,1536,3,0,158.2255,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,288,32768,512,16,0,91.3867,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,288,7168,16384,2,0,521.2036,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,288,36864,7168,2,0,976.0007,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,288,7168,18432,18,0,450.2698,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,288,128,7168,8,0,20.9287,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,288,8192,1536,2,0,56.7309,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,288,2240,7168,13,0,82.4603,a8w8_blockscale_1x128x128_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,288,32768,1536,3,0,209.4934,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,288,3072,1536,3,0,25.3575,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,288,4096,512,16,0,14.6299,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,288,7168,2048,18,0,65.1418,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,288,4608,7168,18,0,134.2474,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,288,7168,2304,3,0,63.8426,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,288,11264,1536,3,0,76.3027,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,288,4096,7168,18,0,106.7871,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,288,512,7168,8,0,28.7647,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,288,7168,256,11,0,16.8451,a8w8_blockscale_1x128x128_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,320,2112,7168,18,0,79.0835,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,24576,1536,18,0,163.7415,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,32768,512,16,0,91.3327,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,7168,16384,18,0,440.8611,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,36864,7168,18,0,865.4396,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,7168,18432,2,0,552.8448,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,320,128,7168,8,0,21.6103,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,320,8192,1536,18,0,55.9177,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,2240,7168,18,0,77.7226,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,32768,1536,2,0,220.1254,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,320,3072,1536,18,0,23.6307,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,4096,512,3,0,13.9711,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,320,7168,2048,18,0,63.8546,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,4608,7168,18,0,126.6017,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,7168,2304,18,0,63.521,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,11264,1536,2,0,81.5799,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,320,4096,7168,18,0,99.8999,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,320,512,7168,8,0,29.4567,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,320,7168,256,16,0,16.5307,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,2112,7168,18,0,86.7303,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,24576,1536,3,0,185.6181,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,352,32768,512,16,0,107.482,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,7168,16384,18,0,528.4344,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,36864,7168,18,0,1107.0329,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,7168,18432,18,0,564.1177,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,128,7168,8,0,21.4607,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,352,8192,1536,3,0,66.795,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,352,2240,7168,18,0,85.4167,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,32768,1536,0,0,240.9319,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,352,3072,1536,2,0,29.2036,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,352,4096,512,16,0,19.1615,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,7168,2048,18,0,81.6351,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,4608,7168,18,0,156.3831,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,7168,2304,3,0,77.3487,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,352,11264,1536,1,0,90.9328,a8w8_blockscale_1x128x128_256x128x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,352,4096,7168,18,0,135.5164,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,512,7168,18,0,36.2172,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,352,7168,256,11,0,19.9235,a8w8_blockscale_1x128x128_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,384,2112,7168,18,0,77.6818,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,384,24576,1536,0,0,160.4975,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,384,32768,512,16,0,106.8048,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,384,7168,16384,18,0,510.7671,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,384,36864,7168,0,0,823.1322,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,384,7168,18432,18,0,520.9614,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,384,128,7168,8,0,21.6103,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,384,8192,1536,0,0,66.5862,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,384,2240,7168,18,0,79.7882,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,384,32768,1536,0,0,200.5321,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,384,3072,1536,2,0,30.3712,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,384,4096,512,16,0,18.5131,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,384,7168,2048,18,0,76.1686,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,384,4608,7168,0,0,138.3706,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,384,7168,2304,3,0,79.7931,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,384,11264,1536,0,0,86.1339,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,384,4096,7168,2,0,157.4221,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,384,512,7168,18,0,32.4251,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,384,7168,256,16,0,19.8795,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,416,2112,7168,18,0,85.7363,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,416,24576,1536,2,0,214.7142,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,416,32768,512,16,0,124.5641,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,416,7168,16384,2,0,638.6827,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,416,36864,7168,18,0,1289.8574,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,416,7168,18432,18,0,625.0472,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,416,128,7168,8,0,21.8407,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,416,8192,1536,3,0,79.0346,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,416,2240,7168,2,0,100.4724,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,416,32768,1536,2,0,286.0878,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,416,3072,1536,3,0,36.8592,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,416,4096,512,16,0,21.6743,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,416,7168,2048,2,0,86.7631,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,416,4608,7168,2,0,195.7449,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,416,7168,2304,3,0,86.4847,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,416,11264,1536,16,0,110.7437,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,416,4096,7168,2,0,162.395,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,416,512,7168,12,0,36.4728,a8w8_blockscale_1x128x128_256x32x128x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,416,7168,256,11,0,22.6419,a8w8_blockscale_1x128x128_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,448,2112,7168,18,0,78.2418,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,24576,1536,2,0,215.2938,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,448,32768,512,16,0,122.9093,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,7168,16384,18,0,568.7454,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,36864,7168,2,0,1317.354,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,448,7168,18432,18,0,571.0165,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,128,7168,8,0,21.8071,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,448,8192,1536,2,0,77.745,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,448,2240,7168,18,0,94.1239,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,32768,1536,2,0,285.5158,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,448,3072,1536,18,0,36.0088,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,4096,512,16,0,21.0995,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,7168,2048,18,0,84.8563,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,4608,7168,18,0,167.8608,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,7168,2304,2,0,79.8603,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,448,11264,1536,2,0,103.8016,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,448,4096,7168,18,0,143.2833,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,512,7168,18,0,32.4547,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,448,7168,256,16,0,21.6627,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,480,2112,7168,2,0,101.052,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,24576,1536,3,0,236.2091,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,32768,512,3,0,140.615,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,7168,16384,2,0,685.2973,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,36864,7168,2,0,1375.2819,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,7168,18432,2,0,722.8485,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,128,7168,8,0,21.7087,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,480,8192,1536,3,0,83.5863,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,2240,7168,2,0,100.3492,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,32768,1536,0,0,310.0055,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,3072,1536,3,0,37.096,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,4096,512,16,0,22.2995,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,480,7168,2048,2,0,97.8496,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,4608,7168,2,0,197.4349,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,7168,2304,3,0,98.382,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,11264,1536,3,0,112.3881,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,4096,7168,2,0,193.8503,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,480,512,7168,18,0,36.4788,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,480,7168,256,11,0,25.0591,a8w8_blockscale_1x128x128_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8_2x1_intrawave_v1,0,0,0 +80,512,64,7168,8,0,21.4855,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,512,128,7168,8,0,21.8127,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,512,2112,7168,18,0,95.2955,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,512,7168,16384,0,0,616.2237,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,512,8192,1536,0,0,82.2627,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,512,32768,512,16,0,138.6678,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,512,24576,1536,0,0,199.0229,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,512,36864,7168,0,0,1092.6052,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,512,7168,18432,0,0,587.1778,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,512,2240,7168,18,0,96.0611,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,512,32768,1536,0,0,258.5844,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,512,11264,1536,0,0,99.7424,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,512,4096,7168,0,0,145.8489,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,1024,64,7168,8,0,21.7759,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1024,128,7168,8,0,29.8,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,1024,2112,7168,18,0,167.8663,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,1024,7168,16384,0,0,1140.981,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,1024,8192,1536,0,0,141.623,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,1024,32768,512,16,0,263.8865,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,1024,24576,1536,0,0,390.4668,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,1024,36864,7168,0,0,2089.0389,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,1024,7168,18432,0,0,1164.9336,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,1024,2240,7168,18,0,171.2028,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,1024,32768,1536,0,0,502.3442,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,1024,11264,1536,3,0,212.6951,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,1024,4096,7168,0,0,293.2092,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,2048,64,7168,8,0,31.3536,a8w8_blockscale_1x128x128_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4_1x1_intrawave_v1,0,0,0 +80,2048,128,7168,18,0,32.7228,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,2048,2112,7168,18,0,322.7936,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,2048,7168,16384,0,0,2211.9911,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,2048,8192,1536,0,0,252.5388,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,2048,32768,512,16,0,513.9487,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,2048,24576,1536,0,0,740.166,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,2048,36864,7168,0,0,4009.9839,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,2048,7168,18432,0,0,2123.1521,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,2048,2240,7168,18,0,330.7757,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,2048,32768,1536,0,0,982.4949,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,2048,11264,1536,0,0,347.4495,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,2048,4096,7168,0,0,506.1934,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,4096,64,7168,18,0,32.8716,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,4096,128,7168,18,0,54.2453,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,4096,2112,7168,18,0,621.3309,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,4096,7168,16384,0,0,4302.3785,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,4096,8192,1536,0,0,492.553,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,4096,32768,512,16,0,1016.0847,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,4096,24576,1536,0,0,1448.9153,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,4096,36864,7168,0,0,7895.1562,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,4096,7168,18432,0,0,4033.3849,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,4096,2240,7168,18,0,643.893,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,4096,32768,1536,0,0,1930.1166,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,4096,11264,1536,0,0,729.0026,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,4096,4096,7168,0,0,907.0889,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,2112,7168,18,0,913.7307,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,6144,24576,1536,3,0,2533.8727,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,32768,512,16,0,1515.4856,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,6144,7168,16384,0,0,6608.4842,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,36864,7168,0,0,12073.6156,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,7168,18432,0,0,6181.2122,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,128,7168,18,0,80.2986,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,6144,8192,1536,0,0,819.0652,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,2240,7168,18,0,963.2848,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,6144,32768,1536,0,0,2916.9733,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,3072,1536,16,0,330.8397,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,6144,4096,512,16,0,196.9929,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,6144,7168,2048,0,0,809.0744,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,4608,7168,0,0,1526.3692,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,7168,2304,0,0,851.0657,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,11264,1536,0,0,996.9846,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,4096,7168,0,0,1377.5484,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,512,7168,0,0,232.7081,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,6144,7168,256,16,0,229.0933,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,8192,64,7168,18,0,57.1329,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,8192,128,7168,18,0,101.6448,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,8192,2112,7168,18,0,1213.0298,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,8192,7168,16384,0,0,8622.1052,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,8192,8192,1536,0,0,974.0173,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,8192,32768,512,16,0,2016.3824,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,8192,24576,1536,0,0,2886.1359,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,8192,36864,7168,0,0,15680.1116,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,8192,7168,18432,0,0,8118.7627,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,8192,2240,7168,18,0,1280.2434,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,8192,32768,1536,3,0,4534.0043,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,8192,11264,1536,0,0,1327.773,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,8192,4096,7168,0,0,1793.3575,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,2112,7168,18,0,1515.2969,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,10240,24576,1536,0,0,3651.431,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,32768,512,16,0,2515.5457,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,10240,7168,16384,0,0,11023.5824,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,36864,7168,0,0,19995.2839,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,7168,18432,0,0,10175.6663,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,128,7168,18,0,105.886,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,10240,8192,1536,3,0,1430.6685,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,2240,7168,18,0,1636.782,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,10240,32768,1536,0,0,4837.2435,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,3072,1536,16,0,547.2561,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,10240,4096,512,16,0,322.5837,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,10240,7168,2048,0,0,1327.4494,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,4608,7168,0,0,2530.3997,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,7168,2304,0,0,1388.6326,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,11264,1536,0,0,1650.8577,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,4096,7168,0,0,2250.9998,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,512,7168,0,0,315.2897,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,10240,7168,256,16,0,368.4904,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,12288,2112,7168,18,0,1828.9839,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,12288,24576,1536,0,0,4305.7272,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,32768,512,16,0,3016.8385,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,12288,7168,16384,0,0,12947.739,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,36864,7168,0,0,23625.3537,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,7168,18432,0,0,12232.127,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,128,7168,18,0,130.3021,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,12288,8192,1536,0,0,1449.011,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,2240,7168,2,0,2046.9716,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,32768,1536,0,0,5785.9662,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,3072,1536,0,0,554.6374,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,4096,512,3,0,388.9976,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,7168,2048,0,0,1607.9242,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,4608,7168,0,0,2993.2517,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,7168,2304,0,0,1685.1739,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,11264,1536,3,0,2345.0071,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,4096,7168,0,0,2718.6056,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,512,7168,0,0,379.5964,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,12288,7168,256,16,0,442.8496,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,14336,2112,7168,18,0,2116.3028,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,14336,24576,1536,3,0,5951.0038,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,32768,512,3,0,3544.876,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,7168,16384,0,0,15187.1505,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,36864,7168,0,0,27944.1037,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,7168,18432,0,0,14287.2359,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,128,7168,18,0,154.4819,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,14336,8192,1536,3,0,1969.4415,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,2240,7168,18,0,2246.3365,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,14336,3072,1536,3,0,757.7926,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,4096,512,16,0,451.8624,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,14336,7168,2048,0,0,1856.2897,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,4608,7168,0,0,3576.6846,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,7168,2304,0,0,1951.3416,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,11264,1536,0,0,2380.0822,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,32768,1536,0,0,6695.5133,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,4096,7168,0,0,3229.5757,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,512,7168,0,0,448.1751,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,14336,7168,256,16,0,524.9808,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,16384,64,7168,18,0,109.3048,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,16384,128,7168,0,0,178.0376,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,16384,2112,7168,18,0,2419.3022,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,16384,7168,16384,0,0,17329.3114,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,16384,8192,1536,0,0,1921.1494,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,16384,32768,512,16,0,4019.012,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,16384,24576,1536,3,0,6732.7083,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,16384,36864,7168,0,0,31524.9476,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,16384,7168,18432,0,0,16294.5139,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,16384,2240,7168,18,0,2564.4367,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,16384,11264,1536,3,0,3104.6685,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,16384,32768,1536,3,0,8991.3448,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,16384,4096,7168,0,0,3558.282,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,64,7168,18,0,190.0733,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,32768,128,7168,18,0,326.7444,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,32768,2112,7168,18,0,4824.1677,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,32768,7168,16384,0,0,34852.366,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,8192,1536,0,0,3815.3061,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,32768,512,3,0,8065.9904,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,24576,1536,3,0,13388.3528,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,36864,7168,0,0,62743.4272,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,7168,18432,0,0,32659.3895,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,2240,7168,18,0,5242.0726,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,32768,3072,1536,0,0,1461.7662,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,4096,512,16,0,1024.1373,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,32768,4608,7168,2,0,10392.1318,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,7168,2304,0,0,4884.5134,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,11264,1536,0,0,5271.9454,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,7168,2048,0,0,4614.1608,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,32768,1536,0,0,15429.1559,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,576,7168,18,0,1350.8663,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,32768,1536,7168,0,0,2845.5978,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,4096,7168,0,0,7182.5926,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,512,7168,0,0,963.846,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,32768,7168,256,16,0,1261.8447,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,65536,64,7168,18,0,350.8382,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,65536,128,7168,0,0,608.956,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,2112,7168,18,0,9617.2811,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,65536,7168,16384,0,0,70478.0846,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,8192,1536,0,0,7621.6687,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,24576,1536,0,0,22949.6148,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,7168,18432,0,0,65026.138,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,2240,7168,2,0,10800.6316,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,3072,1536,3,0,3441.6001,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,4096,512,16,0,2019.1043,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,65536,4608,7168,0,0,15930.1957,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,7168,2304,3,0,10515.839,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,11264,1536,3,0,12267.214,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,7168,2048,0,0,8385.3458,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,1536,7168,0,0,5455.1913,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,36864,7168,0,0,127114.4032,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,576,7168,18,0,2678.1872,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,65536,4096,7168,0,0,14374.4254,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,512,7168,0,0,1946.2413,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,65536,7168,256,16,0,2331.7635,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,98304,2112,7168,2,0,14838.37,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,7168,16384,0,0,105519.4606,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,128,7168,0,0,869.1726,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,8192,1536,0,0,11517.8223,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,2240,7168,18,0,15696.2548,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,98304,3072,1536,0,0,4372.9543,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,4096,512,3,0,3142.5369,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,4608,7168,2,0,31224.8208,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,7168,2304,3,0,15786.6506,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,11264,1536,0,0,15928.1433,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,7168,2048,0,0,12615.4473,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,7168,18432,0,0,99003.4516,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,1536,7168,0,0,8187.0758,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,576,7168,2,0,4586.9954,a8w8_blockscale_1x128x128_256x64x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,4096,7168,0,0,21200.0175,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,512,7168,0,0,2849.7983,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,98304,7168,256,16,0,3479.2173,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,131072,2112,7168,18,0,19268.6081,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,131072,7168,16384,18,0,154106.4002,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,131072,128,7168,0,0,1131.8661,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,131072,8192,1536,0,0,15358.2359,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,131072,2240,7168,18,0,20374.7532,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,131072,3072,1536,3,0,6873.7457,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,131072,4096,512,16,0,4029.3703,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,131072,7168,2048,0,0,17225.8363,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,131072,7168,18432,0,0,131465.7679,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,131072,1536,7168,0,0,10881.6908,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,131072,576,7168,18,0,5376.424,a8w8_blockscale_1x128x128_256x64x64x256_16x16_32x32_16x16x1_16x16x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 +80,131072,4096,7168,0,0,28332.6632,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,131072,512,7168,0,0,3803.5469,a8w8_blockscale_1x128x128_256x128x128x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v3,0,0,0 +80,131072,7168,256,16,0,4668.3753,a8w8_blockscale_1x128x128_256x64x64x128_16x16_32x32_8x32x1_8x32x1_1x32x1x8_8_1x1_intrawave_v1,0,0,0 diff --git a/aiter/configs/model_configs/a8w8_bpreshuffle_tuned_gemm_dsv3.csv b/aiter/configs/model_configs/a8w8_bpreshuffle_tuned_gemm_dsv3.csv index 5496aa90d8..0461b68501 100644 --- a/aiter/configs/model_configs/a8w8_bpreshuffle_tuned_gemm_dsv3.csv +++ b/aiter/configs/model_configs/a8w8_bpreshuffle_tuned_gemm_dsv3.csv @@ -316,3 +316,490 @@ cu_num,M,N,K,q_dtype_w,kernelId,splitK,us,kernelName,tflops,bw,errRatio 80,131072,2112,7168,torch.float8_e4m3fnuz,93,0,10093.2233,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,3931.90,149.44,0 80,131072,2240,7168,torch.float8_e4m3fnuz,69,0,12728.0402,a8w8_bpreshuffle_256x128x160x128_16x16_16x16_8x32x1_8x32x1_1x64x1x4_8x8x1_2x1_intrawave_v3,3306.93,121.21,0 80,131072,11264,1536,torch.float8_e4m3fnuz,71,0,12851.8716,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,3529.05,246.77,0 +80,1,128,7168,torch.float8_e4m3fnuz,25,0,10.735,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v2,0,0,0 +80,1,576,7168,torch.float8_e4m3fnuz,10,0,11.2398,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,1,1536,7168,torch.float8_e4m3fnuz,10,0,11.721,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,1,3072,1536,torch.float8_e4m3fnuz,5,0,5.6562,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,1,4096,7168,torch.float8_e4m3fnuz,11,0,13.4343,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,1,7168,2048,torch.float8_e4m3fnuz,10,0,9.1526,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,1,7168,16384,torch.float8_e4m3fnuz,24,0,39.678,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v2,0,0,0 +80,1,7168,18432,torch.float8_e4m3fnuz,10,0,42.556,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,1,8192,1536,torch.float8_e4m3fnuz,11,0,7.6538,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,1,24576,1536,torch.float8_e4m3fnuz,15,0,13.5093,a8w8_bpreshuffle_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,1,32768,512,torch.float8_e4m3fnuz,9,0,9.7066,a8w8_bpreshuffle_128x16x32x128_16x16_16x16_8x16x1_8x16x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,1,32768,1536,torch.float8_e4m3fnuz,15,0,17.5611,a8w8_bpreshuffle_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,1,36864,7168,torch.float8_e4m3fnuz,6,0,76.2189,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,2,128,7168,torch.float8_e4m3fnuz,10,0,10.1434,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,512,7168,torch.float8_e4m3fnuz,10,0,11.2907,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,576,7168,torch.float8_e4m3fnuz,10,0,11.5102,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,1536,7168,torch.float8_e4m3fnuz,10,0,12.1679,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,2240,7168,torch.float8_e4m3fnuz,10,0,12.1906,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,3072,1536,torch.float8_e4m3fnuz,11,0,5.911,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,4096,512,torch.float8_e4m3fnuz,23,0,4.4526,a8w8_bpreshuffle_128x16x32x128_16x16_16x16_8x16x1_8x16x1_1x16x1x8_4x4x1_1x1_intrawave_v2,0,0,0 +80,2,4096,7168,torch.float8_e4m3fnuz,11,0,13.2939,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,4608,7168,torch.float8_e4m3fnuz,5,0,13.597,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,7168,256,torch.float8_e4m3fnuz,75,0,6.3266,a8w8_bpreshuffle_128x16x256x64_16x16_16x16_4x16x1_4x32x1_1x16x1x8_8x8x1_1x2_intrawave_v1,0,0,0 +80,2,7168,2048,torch.float8_e4m3fnuz,5,0,8.5166,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,7168,2304,torch.float8_e4m3fnuz,29,0,9.905,a8w8_bpreshuffle_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v2,0,0,0 +80,2,7168,16384,torch.float8_e4m3fnuz,10,0,37.3783,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,7168,18432,torch.float8_e4m3fnuz,10,0,42.2924,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,8192,1536,torch.float8_e4m3fnuz,5,0,7.8006,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,11264,1536,torch.float8_e4m3fnuz,15,0,9.4898,a8w8_bpreshuffle_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,24576,1536,torch.float8_e4m3fnuz,108,0,13.6777,a8w8_bpreshuffle_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,32768,512,torch.float8_e4m3fnuz,9,0,9.513,a8w8_bpreshuffle_128x16x32x128_16x16_16x16_8x16x1_8x16x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,32768,1536,torch.float8_e4m3fnuz,108,0,17.4059,a8w8_bpreshuffle_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,2,36864,7168,torch.float8_e4m3fnuz,20,0,78.3165,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v2,0,0,0 +80,4,128,7168,torch.float8_e4m3fnuz,10,0,10.0606,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,512,7168,torch.float8_e4m3fnuz,10,0,11.6471,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,576,7168,torch.float8_e4m3fnuz,10,0,11.6411,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,1536,7168,torch.float8_e4m3fnuz,10,0,12.2579,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,2240,7168,torch.float8_e4m3fnuz,10,0,12.3558,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,3072,1536,torch.float8_e4m3fnuz,11,0,5.8834,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,4096,512,torch.float8_e4m3fnuz,23,0,4.5342,a8w8_bpreshuffle_128x16x32x128_16x16_16x16_8x16x1_8x16x1_1x16x1x8_4x4x1_1x1_intrawave_v2,0,0,0 +80,4,4096,7168,torch.float8_e4m3fnuz,5,0,13.5363,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,4608,7168,torch.float8_e4m3fnuz,5,0,13.6563,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,7168,256,torch.float8_e4m3fnuz,73,0,6.9558,a8w8_bpreshuffle_256x32x256x64_16x16_16x16_4x32x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,4,7168,2048,torch.float8_e4m3fnuz,108,0,8.5722,a8w8_bpreshuffle_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,7168,2304,torch.float8_e4m3fnuz,108,0,9.9551,a8w8_bpreshuffle_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,7168,16384,torch.float8_e4m3fnuz,10,0,37.6428,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,7168,18432,torch.float8_e4m3fnuz,10,0,42.1052,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,8192,1536,torch.float8_e4m3fnuz,6,0,8.0606,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,4,11264,1536,torch.float8_e4m3fnuz,10,0,9.667,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,24576,1536,torch.float8_e4m3fnuz,15,0,13.9333,a8w8_bpreshuffle_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,4,32768,512,torch.float8_e4m3fnuz,23,0,9.1702,a8w8_bpreshuffle_128x16x32x128_16x16_16x16_8x16x1_8x16x1_1x16x1x8_4x4x1_1x1_intrawave_v2,0,0,0 +80,4,32768,1536,torch.float8_e4m3fnuz,109,0,18.3707,a8w8_bpreshuffle_256x16x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,4,36864,7168,torch.float8_e4m3fnuz,32,0,78.8657,a8w8_bpreshuffle_256x16x512x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v2,0,0,0 +80,8,128,7168,torch.float8_e4m3fnuz,10,0,10.4198,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,512,7168,torch.float8_e4m3fnuz,10,0,11.7575,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,576,7168,torch.float8_e4m3fnuz,10,0,11.871,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,1536,7168,torch.float8_e4m3fnuz,10,0,12.559,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,2240,7168,torch.float8_e4m3fnuz,10,0,12.6438,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,3072,1536,torch.float8_e4m3fnuz,10,0,5.8254,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,4096,512,torch.float8_e4m3fnuz,9,0,4.1858,a8w8_bpreshuffle_128x16x32x128_16x16_16x16_8x16x1_8x16x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,4096,7168,torch.float8_e4m3fnuz,11,0,13.6103,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,4608,7168,torch.float8_e4m3fnuz,5,0,13.8099,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,7168,256,torch.float8_e4m3fnuz,75,0,6.2842,a8w8_bpreshuffle_128x16x256x64_16x16_16x16_4x16x1_4x32x1_1x16x1x8_8x8x1_1x2_intrawave_v1,0,0,0 +80,8,7168,2048,torch.float8_e4m3fnuz,24,0,8.685,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v2,0,0,0 +80,8,7168,2304,torch.float8_e4m3fnuz,108,0,10.0038,a8w8_bpreshuffle_256x16x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,7168,16384,torch.float8_e4m3fnuz,10,0,39.2255,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,7168,18432,torch.float8_e4m3fnuz,24,0,45.0508,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v2,0,0,0 +80,8,8192,1536,torch.float8_e4m3fnuz,5,0,8.1558,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,11264,1536,torch.float8_e4m3fnuz,10,0,10.0366,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,24576,1536,torch.float8_e4m3fnuz,9,0,14.5085,a8w8_bpreshuffle_128x16x32x128_16x16_16x16_8x16x1_8x16x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,32768,512,torch.float8_e4m3fnuz,9,0,9.9258,a8w8_bpreshuffle_128x16x32x128_16x16_16x16_8x16x1_8x16x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,32768,1536,torch.float8_e4m3fnuz,5,0,18.2531,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,8,36864,7168,torch.float8_e4m3fnuz,111,0,79.5469,a8w8_bpreshuffle_256x16x512x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,16,128,7168,torch.float8_e4m3fnuz,19,0,10.0946,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v2,0,0,0 +80,16,4096,7168,torch.float8_e4m3fnuz,11,0,13.1683,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,16,7168,16384,torch.float8_e4m3fnuz,6,0,42.1056,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,16,7168,18432,torch.float8_e4m3fnuz,6,0,45.8424,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,16,8192,1536,torch.float8_e4m3fnuz,6,0,7.9894,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,16,24576,1536,torch.float8_e4m3fnuz,5,0,15.7929,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,16,32768,512,torch.float8_e4m3fnuz,9,0,10.2342,a8w8_bpreshuffle_128x16x32x128_16x16_16x16_8x16x1_8x16x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,16,32768,1536,torch.float8_e4m3fnuz,5,0,19.2975,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,16,36864,7168,torch.float8_e4m3fnuz,6,0,80.7001,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,32,128,7168,torch.float8_e4m3fnuz,10,0,10.0298,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,32,4096,7168,torch.float8_e4m3fnuz,12,0,18.5983,a8w8_bpreshuffle_256x32x64x512_16x16_16x16_32x8x1_32x8x1_1x32x1x8_8x8x1_1x2_intrawave_v1,0,0,0 +80,32,7168,18432,torch.float8_e4m3fnuz,119,0,58.5048,a8w8_bpreshuffle_256x32x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,32,8192,1536,torch.float8_e4m3fnuz,112,0,10.3018,a8w8_bpreshuffle_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,32,32768,512,torch.float8_e4m3fnuz,76,0,12.0918,a8w8_bpreshuffle_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,32,32768,1536,torch.float8_e4m3fnuz,119,0,24.3999,a8w8_bpreshuffle_256x32x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,32,36864,7168,torch.float8_e4m3fnuz,133,0,93.517,a8w8_bpreshuffle_256x32x256x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,64,128,7168,torch.float8_e4m3fnuz,24,0,9.9198,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v2,0,0,0 +80,64,4096,7168,torch.float8_e4m3fnuz,114,0,23.8807,a8w8_bpreshuffle_256x64x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,64,7168,18432,torch.float8_e4m3fnuz,121,0,89.7754,a8w8_bpreshuffle_256x64x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,64,8192,1536,torch.float8_e4m3fnuz,114,0,12.7978,a8w8_bpreshuffle_256x64x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,64,32768,512,torch.float8_e4m3fnuz,85,0,18.7478,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,64,32768,1536,torch.float8_e4m3fnuz,101,0,34.2699,a8w8_bpreshuffle_256x64x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,64,36864,7168,torch.float8_e4m3fnuz,121,0,129.978,a8w8_bpreshuffle_256x64x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,96,128,7168,torch.float8_e4m3fnuz,25,0,10.3566,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v2,0,0,0 +80,96,576,7168,torch.float8_e4m3fnuz,10,0,12.065,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,96,1536,7168,torch.float8_e4m3fnuz,6,0,18.0262,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,96,4096,7168,torch.float8_e4m3fnuz,120,0,29.6988,a8w8_bpreshuffle_256x48x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,96,7168,18432,torch.float8_e4m3fnuz,113,0,109.8187,a8w8_bpreshuffle_256x48x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,96,8192,1536,torch.float8_e4m3fnuz,120,0,16.253,a8w8_bpreshuffle_256x48x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,96,32768,512,torch.float8_e4m3fnuz,84,0,23.8023,a8w8_bpreshuffle_256x32x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,96,32768,1536,torch.float8_e4m3fnuz,102,0,43.5312,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,96,36864,7168,torch.float8_e4m3fnuz,102,0,155.5081,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,128,128,7168,torch.float8_e4m3fnuz,10,0,9.909,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,128,4096,7168,torch.float8_e4m3fnuz,121,0,38.8588,a8w8_bpreshuffle_256x64x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,128,7168,18432,torch.float8_e4m3fnuz,114,0,133.6804,a8w8_bpreshuffle_256x64x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,128,8192,1536,torch.float8_e4m3fnuz,121,0,19.1559,a8w8_bpreshuffle_256x64x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,128,32768,512,torch.float8_e4m3fnuz,85,0,28.3083,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,128,32768,1536,torch.float8_e4m3fnuz,85,0,54.7332,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,128,36864,7168,torch.float8_e4m3fnuz,93,0,201.1007,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,160,128,7168,torch.float8_e4m3fnuz,10,0,10.2346,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,160,512,7168,torch.float8_e4m3fnuz,25,0,12.1467,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v2,0,0,0 +80,160,576,7168,torch.float8_e4m3fnuz,10,0,16.0783,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,160,1536,7168,torch.float8_e4m3fnuz,119,0,24.4126,a8w8_bpreshuffle_256x32x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,160,2240,7168,torch.float8_e4m3fnuz,115,0,28.9759,a8w8_bpreshuffle_256x80x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v3,0,0,0 +80,160,4096,512,torch.float8_e4m3fnuz,84,0,7.7118,a8w8_bpreshuffle_256x32x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,160,4096,7168,torch.float8_e4m3fnuz,119,0,40.6369,a8w8_bpreshuffle_256x32x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,160,4608,7168,torch.float8_e4m3fnuz,122,0,46.244,a8w8_bpreshuffle_256x80x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,160,7168,256,torch.float8_e4m3fnuz,75,0,10.251,a8w8_bpreshuffle_128x16x256x64_16x16_16x16_4x16x1_4x32x1_1x16x1x8_8x8x1_1x2_intrawave_v1,0,0,0 +80,160,7168,2304,torch.float8_e4m3fnuz,119,0,27.2747,a8w8_bpreshuffle_256x32x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,160,7168,18432,torch.float8_e4m3fnuz,136,0,168.4653,a8w8_bpreshuffle_256x80x256x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,160,8192,1536,torch.float8_e4m3fnuz,100,0,21.719,a8w8_bpreshuffle_256x32x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,160,11264,1536,torch.float8_e4m3fnuz,100,0,30.1239,a8w8_bpreshuffle_256x32x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,160,32768,512,torch.float8_e4m3fnuz,84,0,34.3591,a8w8_bpreshuffle_256x32x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,160,32768,1536,torch.float8_e4m3fnuz,100,0,73.0357,a8w8_bpreshuffle_256x32x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,160,36864,7168,torch.float8_e4m3fnuz,156,0,267.3258,a8w8_bpreshuffle_256x160x256x128_16x16_16x16_8x32x1_8x32x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,192,128,7168,torch.float8_e4m3fnuz,10,0,10.2034,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,192,512,7168,torch.float8_e4m3fnuz,10,0,16.0911,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,192,576,7168,torch.float8_e4m3fnuz,10,0,17.0079,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,192,1536,7168,torch.float8_e4m3fnuz,114,0,24.0194,a8w8_bpreshuffle_256x64x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,192,4096,512,torch.float8_e4m3fnuz,84,0,9.9422,a8w8_bpreshuffle_256x32x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,192,4096,7168,torch.float8_e4m3fnuz,123,0,49.1457,a8w8_bpreshuffle_256x96x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,192,4608,7168,torch.float8_e4m3fnuz,128,0,47.9264,a8w8_bpreshuffle_256x64x192x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,192,7168,256,torch.float8_e4m3fnuz,73,0,10.5659,a8w8_bpreshuffle_256x32x256x64_16x16_16x16_4x32x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,192,7168,2304,torch.float8_e4m3fnuz,120,0,29.5015,a8w8_bpreshuffle_256x48x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,192,7168,18432,torch.float8_e4m3fnuz,120,0,181.5802,a8w8_bpreshuffle_256x48x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,192,8192,1536,torch.float8_e4m3fnuz,86,0,24.7435,a8w8_bpreshuffle_256x96x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,192,32768,512,torch.float8_e4m3fnuz,85,0,37.0759,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,192,32768,1536,torch.float8_e4m3fnuz,85,0,73.3389,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,192,36864,7168,torch.float8_e4m3fnuz,94,0,286.0543,a8w8_bpreshuffle_256x96x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,224,128,7168,torch.float8_e4m3fnuz,24,0,10.2858,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v2,0,0,0 +80,224,512,7168,torch.float8_e4m3fnuz,19,0,17.1051,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v2,0,0,0 +80,224,576,7168,torch.float8_e4m3fnuz,25,0,17.1379,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v2,0,0,0 +80,224,1536,7168,torch.float8_e4m3fnuz,115,0,31.7078,a8w8_bpreshuffle_256x80x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v3,0,0,0 +80,224,2240,7168,torch.float8_e4m3fnuz,117,0,37.5099,a8w8_bpreshuffle_256x112x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v3,0,0,0 +80,224,4096,512,torch.float8_e4m3fnuz,76,0,10.187,a8w8_bpreshuffle_256x32x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,224,4096,7168,torch.float8_e4m3fnuz,120,0,52.8765,a8w8_bpreshuffle_256x48x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,224,4608,7168,torch.float8_e4m3fnuz,124,0,56.3824,a8w8_bpreshuffle_256x112x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,224,7168,256,torch.float8_e4m3fnuz,73,0,10.9003,a8w8_bpreshuffle_256x32x256x64_16x16_16x16_4x32x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,224,7168,2304,torch.float8_e4m3fnuz,85,0,32.8835,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,224,7168,18432,torch.float8_e4m3fnuz,85,0,207.6011,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,224,8192,1536,torch.float8_e4m3fnuz,100,0,28.8459,a8w8_bpreshuffle_256x32x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,224,11264,1536,torch.float8_e4m3fnuz,100,0,37.6656,a8w8_bpreshuffle_256x32x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,224,32768,512,torch.float8_e4m3fnuz,84,0,45.0439,a8w8_bpreshuffle_256x32x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,224,32768,1536,torch.float8_e4m3fnuz,85,0,91.0766,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,224,36864,7168,torch.float8_e4m3fnuz,40,0,377.2671,a8w8_bpreshuffle_256x224x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,256,128,7168,torch.float8_e4m3fnuz,10,0,10.8778,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,256,4096,7168,torch.float8_e4m3fnuz,85,0,60.0334,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,256,7168,18432,torch.float8_e4m3fnuz,85,0,209.5107,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,256,8192,1536,torch.float8_e4m3fnuz,85,0,30.3059,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,256,32768,512,torch.float8_e4m3fnuz,85,0,46.3119,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,256,32768,1536,torch.float8_e4m3fnuz,85,0,92.2438,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,256,36864,7168,torch.float8_e4m3fnuz,68,0,371.2046,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,288,128,7168,torch.float8_e4m3fnuz,10,0,10.7298,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,288,512,7168,torch.float8_e4m3fnuz,6,0,17.6475,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,288,576,7168,torch.float8_e4m3fnuz,113,0,21.2123,a8w8_bpreshuffle_256x48x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,288,1536,7168,torch.float8_e4m3fnuz,120,0,29.6846,a8w8_bpreshuffle_256x48x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,288,2240,7168,torch.float8_e4m3fnuz,113,0,44.5,a8w8_bpreshuffle_256x48x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,288,4096,512,torch.float8_e4m3fnuz,85,0,10.8502,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,288,4096,7168,torch.float8_e4m3fnuz,85,0,62.9998,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,288,4608,7168,torch.float8_e4m3fnuz,130,0,66.7012,a8w8_bpreshuffle_256x96x192x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,288,7168,256,torch.float8_e4m3fnuz,72,0,12.7115,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,288,7168,2304,torch.float8_e4m3fnuz,85,0,40.9768,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,288,7168,18432,torch.float8_e4m3fnuz,85,0,272.1414,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,288,8192,1536,torch.float8_e4m3fnuz,85,0,32.0963,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,288,11264,1536,torch.float8_e4m3fnuz,102,0,42.7952,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,288,32768,512,torch.float8_e4m3fnuz,84,0,53.506,a8w8_bpreshuffle_256x32x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,288,32768,1536,torch.float8_e4m3fnuz,102,0,101.9306,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,288,36864,7168,torch.float8_e4m3fnuz,102,0,437.3066,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,320,128,7168,torch.float8_e4m3fnuz,24,0,10.9094,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v2,0,0,0 +80,320,512,7168,torch.float8_e4m3fnuz,6,0,17.8315,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,320,576,7168,torch.float8_e4m3fnuz,112,0,22.5067,a8w8_bpreshuffle_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,320,1536,7168,torch.float8_e4m3fnuz,126,0,32.1157,a8w8_bpreshuffle_256x32x192x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,320,2240,7168,torch.float8_e4m3fnuz,78,0,53.8992,a8w8_bpreshuffle_256x96x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,320,4096,512,torch.float8_e4m3fnuz,84,0,10.783,a8w8_bpreshuffle_256x32x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,320,4096,7168,torch.float8_e4m3fnuz,121,0,63.2822,a8w8_bpreshuffle_256x64x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,320,4608,7168,torch.float8_e4m3fnuz,136,0,72.1189,a8w8_bpreshuffle_256x80x256x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,320,7168,256,torch.float8_e4m3fnuz,74,0,13.0231,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,320,7168,2304,torch.float8_e4m3fnuz,85,0,40.2392,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,320,7168,18432,torch.float8_e4m3fnuz,101,0,270.4958,a8w8_bpreshuffle_256x64x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,320,8192,1536,torch.float8_e4m3fnuz,101,0,32.6299,a8w8_bpreshuffle_256x64x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,320,11264,1536,torch.float8_e4m3fnuz,85,0,45.0808,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,320,32768,512,torch.float8_e4m3fnuz,85,0,55.0543,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,320,32768,1536,torch.float8_e4m3fnuz,85,0,114.3911,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,320,36864,7168,torch.float8_e4m3fnuz,128,0,476.2091,a8w8_bpreshuffle_256x64x192x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,352,128,7168,torch.float8_e4m3fnuz,10,0,11.2254,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,352,512,7168,torch.float8_e4m3fnuz,112,0,22.2763,a8w8_bpreshuffle_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,352,576,7168,torch.float8_e4m3fnuz,112,0,22.5607,a8w8_bpreshuffle_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,352,1536,7168,torch.float8_e4m3fnuz,133,0,39.9737,a8w8_bpreshuffle_256x32x256x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,352,2240,7168,torch.float8_e4m3fnuz,78,0,54.0168,a8w8_bpreshuffle_256x96x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,352,4096,512,torch.float8_e4m3fnuz,84,0,12.0603,a8w8_bpreshuffle_256x32x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,352,4096,7168,torch.float8_e4m3fnuz,136,0,74.9351,a8w8_bpreshuffle_256x80x256x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,352,4608,7168,torch.float8_e4m3fnuz,128,0,84.2541,a8w8_bpreshuffle_256x64x192x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,352,7168,256,torch.float8_e4m3fnuz,75,0,15.1695,a8w8_bpreshuffle_128x16x256x64_16x16_16x16_4x16x1_4x32x1_1x16x1x8_8x8x1_1x2_intrawave_v1,0,0,0 +80,352,7168,2304,torch.float8_e4m3fnuz,100,0,49.1372,a8w8_bpreshuffle_256x32x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,352,7168,18432,torch.float8_e4m3fnuz,86,0,324.6944,a8w8_bpreshuffle_256x96x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,352,8192,1536,torch.float8_e4m3fnuz,85,0,38.7583,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,352,11264,1536,torch.float8_e4m3fnuz,85,0,51.422,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,352,32768,512,torch.float8_e4m3fnuz,72,0,64.3624,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,352,32768,1536,torch.float8_e4m3fnuz,71,0,132.552,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,352,36864,7168,torch.float8_e4m3fnuz,71,0,568.6663,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,128,7168,torch.float8_e4m3fnuz,10,0,11.3822,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,384,512,7168,torch.float8_e4m3fnuz,113,0,21.2655,a8w8_bpreshuffle_256x48x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,384,576,7168,torch.float8_e4m3fnuz,113,0,21.2619,a8w8_bpreshuffle_256x48x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,384,1536,7168,torch.float8_e4m3fnuz,114,0,37.6905,a8w8_bpreshuffle_256x64x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,2240,7168,torch.float8_e4m3fnuz,78,0,53.062,a8w8_bpreshuffle_256x96x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,4096,512,torch.float8_e4m3fnuz,84,0,12.2599,a8w8_bpreshuffle_256x32x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,384,4096,7168,torch.float8_e4m3fnuz,86,0,81.866,a8w8_bpreshuffle_256x96x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,4608,7168,torch.float8_e4m3fnuz,93,0,81.9457,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,7168,256,torch.float8_e4m3fnuz,72,0,15.1515,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,384,7168,2304,torch.float8_e4m3fnuz,86,0,48.0036,a8w8_bpreshuffle_256x96x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,7168,18432,torch.float8_e4m3fnuz,86,0,300.0663,a8w8_bpreshuffle_256x96x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,8192,1536,torch.float8_e4m3fnuz,85,0,38.4623,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,11264,1536,torch.float8_e4m3fnuz,85,0,51.3588,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,32768,512,torch.float8_e4m3fnuz,85,0,63.438,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,32768,1536,torch.float8_e4m3fnuz,71,0,132.9488,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,384,36864,7168,torch.float8_e4m3fnuz,94,0,555.2415,a8w8_bpreshuffle_256x96x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,416,128,7168,torch.float8_e4m3fnuz,24,0,11.6798,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v2,0,0,0 +80,416,512,7168,torch.float8_e4m3fnuz,112,0,22.5591,a8w8_bpreshuffle_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,416,576,7168,torch.float8_e4m3fnuz,112,0,22.8351,a8w8_bpreshuffle_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,416,1536,7168,torch.float8_e4m3fnuz,119,0,38.3401,a8w8_bpreshuffle_256x32x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,416,2240,7168,torch.float8_e4m3fnuz,113,0,62.1588,a8w8_bpreshuffle_256x48x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,416,3072,1536,torch.float8_e4m3fnuz,100,0,20.7247,a8w8_bpreshuffle_256x32x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,416,4096,512,torch.float8_e4m3fnuz,85,0,13.5035,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,416,4096,7168,torch.float8_e4m3fnuz,85,0,86.134,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,416,4608,7168,torch.float8_e4m3fnuz,138,0,101.4186,a8w8_bpreshuffle_256x112x256x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,416,7168,256,torch.float8_e4m3fnuz,74,0,15.9059,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,416,7168,2048,torch.float8_e4m3fnuz,85,0,48.1552,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,416,7168,2304,torch.float8_e4m3fnuz,85,0,51.2564,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,416,7168,16384,torch.float8_e4m3fnuz,85,0,310.2738,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,416,7168,18432,torch.float8_e4m3fnuz,85,0,352.9218,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,416,8192,1536,torch.float8_e4m3fnuz,72,0,43.8675,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,416,11264,1536,torch.float8_e4m3fnuz,85,0,58.2264,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,416,24576,1536,torch.float8_e4m3fnuz,102,0,122.5387,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,416,32768,512,torch.float8_e4m3fnuz,84,0,73.9008,a8w8_bpreshuffle_256x32x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,416,32768,1536,torch.float8_e4m3fnuz,85,0,155.3704,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,416,36864,7168,torch.float8_e4m3fnuz,93,0,649.7863,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,128,7168,torch.float8_e4m3fnuz,10,0,11.523,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,448,512,7168,torch.float8_e4m3fnuz,112,0,22.7188,a8w8_bpreshuffle_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,448,576,7168,torch.float8_e4m3fnuz,112,0,22.8423,a8w8_bpreshuffle_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,448,1536,7168,torch.float8_e4m3fnuz,113,0,46.246,a8w8_bpreshuffle_256x48x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,448,2240,7168,torch.float8_e4m3fnuz,114,0,70.1253,a8w8_bpreshuffle_256x64x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,3072,1536,torch.float8_e4m3fnuz,92,0,22.3115,a8w8_bpreshuffle_256x32x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,448,4096,512,torch.float8_e4m3fnuz,85,0,13.3723,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,4096,7168,torch.float8_e4m3fnuz,85,0,84.5872,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,4608,7168,torch.float8_e4m3fnuz,138,0,97.0854,a8w8_bpreshuffle_256x112x256x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,448,7168,256,torch.float8_e4m3fnuz,72,0,15.6539,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,448,7168,2048,torch.float8_e4m3fnuz,85,0,47.8488,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,7168,2304,torch.float8_e4m3fnuz,85,0,49.72,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,7168,16384,torch.float8_e4m3fnuz,85,0,316.355,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,7168,18432,torch.float8_e4m3fnuz,85,0,356.7658,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,8192,1536,torch.float8_e4m3fnuz,72,0,43.8247,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,448,11264,1536,torch.float8_e4m3fnuz,85,0,57.7888,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,24576,1536,torch.float8_e4m3fnuz,93,0,117.3539,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,32768,512,torch.float8_e4m3fnuz,85,0,72.2692,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,32768,1536,torch.float8_e4m3fnuz,85,0,153.3388,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,448,36864,7168,torch.float8_e4m3fnuz,93,0,631.7091,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,128,7168,torch.float8_e4m3fnuz,10,0,11.607,a8w8_bpreshuffle_128x16x32x512_16x16_16x16_32x4x1_32x4x1_1x16x1x8_4x4x1_1x1_intrawave_v1,0,0,0 +80,480,512,7168,torch.float8_e4m3fnuz,113,0,21.5703,a8w8_bpreshuffle_256x48x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,480,576,7168,torch.float8_e4m3fnuz,112,0,24.5819,a8w8_bpreshuffle_256x32x64x256_16x16_16x16_16x16x1_16x16x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,480,1536,7168,torch.float8_e4m3fnuz,113,0,45.4048,a8w8_bpreshuffle_256x48x64x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,480,2240,7168,torch.float8_e4m3fnuz,62,0,70.4585,a8w8_bpreshuffle_256x128x64x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,3072,1536,torch.float8_e4m3fnuz,94,0,22.3915,a8w8_bpreshuffle_256x96x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,4096,512,torch.float8_e4m3fnuz,86,0,14.0771,a8w8_bpreshuffle_256x96x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,4096,7168,torch.float8_e4m3fnuz,86,0,82.8028,a8w8_bpreshuffle_256x96x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,4608,7168,torch.float8_e4m3fnuz,56,0,105.9346,a8w8_bpreshuffle_256x160x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,7168,256,torch.float8_e4m3fnuz,72,0,16.1387,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,480,7168,2048,torch.float8_e4m3fnuz,102,0,52.4824,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,7168,2304,torch.float8_e4m3fnuz,102,0,56.9516,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,7168,16384,torch.float8_e4m3fnuz,86,0,346.0471,a8w8_bpreshuffle_256x96x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,7168,18432,torch.float8_e4m3fnuz,102,0,379.6631,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,8192,1536,torch.float8_e4m3fnuz,102,0,43.0079,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,11264,1536,torch.float8_e4m3fnuz,102,0,64.1117,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,24576,1536,torch.float8_e4m3fnuz,102,0,119.4687,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,32768,512,torch.float8_e4m3fnuz,102,0,81.2012,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,32768,1536,torch.float8_e4m3fnuz,102,0,155.1241,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,480,36864,7168,torch.float8_e4m3fnuz,102,0,655.0492,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,512,128,7168,torch.float8_e4m3fnuz,11,0,12.1131,a8w8_bpreshuffle_256x16x64x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_4x4x1_1x1_intrawave_v1,0,0,0 +80,512,4096,7168,torch.float8_e4m3fnuz,138,0,102.7541,a8w8_bpreshuffle_256x112x256x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,512,7168,16384,torch.float8_e4m3fnuz,71,0,346.2792,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,512,7168,18432,torch.float8_e4m3fnuz,70,0,389.6687,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,512,8192,1536,torch.float8_e4m3fnuz,85,0,50.0471,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,512,24576,1536,torch.float8_e4m3fnuz,93,0,126.7363,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,512,32768,512,torch.float8_e4m3fnuz,85,0,82.7804,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,512,32768,1536,torch.float8_e4m3fnuz,71,0,167.8113,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,512,36864,7168,torch.float8_e4m3fnuz,68,0,726.0635,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,1024,128,7168,torch.float8_e4m3fnuz,6,0,17.8979,a8w8_bpreshuffle_256x16x128x512_16x16_16x16_32x8x1_32x8x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,1024,4096,7168,torch.float8_e4m3fnuz,85,0,179.0322,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,1024,7168,16384,torch.float8_e4m3fnuz,85,0,661.406,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,1024,7168,18432,torch.float8_e4m3fnuz,85,0,745.9611,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,1024,8192,1536,torch.float8_e4m3fnuz,71,0,90.3089,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,1024,24576,1536,torch.float8_e4m3fnuz,68,0,241.8128,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,1024,32768,512,torch.float8_e4m3fnuz,71,0,148.0686,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,1024,32768,1536,torch.float8_e4m3fnuz,71,0,317.4319,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,1024,36864,7168,torch.float8_e4m3fnuz,93,0,1406.3674,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,2048,128,7168,torch.float8_e4m3fnuz,119,0,24.0187,a8w8_bpreshuffle_256x32x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,2048,4096,7168,torch.float8_e4m3fnuz,85,0,328.802,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,2048,7168,16384,torch.float8_e4m3fnuz,85,0,1256.6524,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,2048,7168,18432,torch.float8_e4m3fnuz,85,0,1411.2909,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,2048,8192,1536,torch.float8_e4m3fnuz,71,0,160.9323,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,2048,24576,1536,torch.float8_e4m3fnuz,71,0,459.7917,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,2048,32768,512,torch.float8_e4m3fnuz,71,0,283.8954,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,2048,32768,1536,torch.float8_e4m3fnuz,71,0,610.8983,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,2048,36864,7168,torch.float8_e4m3fnuz,93,0,2759.912,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,4096,128,7168,torch.float8_e4m3fnuz,121,0,38.9415,a8w8_bpreshuffle_256x64x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,4096,4096,7168,torch.float8_e4m3fnuz,85,0,645.3373,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,4096,7168,16384,torch.float8_e4m3fnuz,85,0,2443.998,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,4096,7168,18432,torch.float8_e4m3fnuz,85,0,2773.2274,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,4096,8192,1536,torch.float8_e4m3fnuz,71,0,307.3715,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,4096,24576,1536,torch.float8_e4m3fnuz,72,0,920.9012,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,4096,32768,512,torch.float8_e4m3fnuz,71,0,554.3222,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,4096,32768,1536,torch.float8_e4m3fnuz,71,0,1193.686,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,4096,36864,7168,torch.float8_e4m3fnuz,93,0,5492.6804,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,128,7168,torch.float8_e4m3fnuz,123,0,49.1848,a8w8_bpreshuffle_256x96x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,6144,512,7168,torch.float8_e4m3fnuz,85,0,143.4871,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,576,7168,torch.float8_e4m3fnuz,68,0,151.638,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,1536,7168,torch.float8_e4m3fnuz,93,0,368.8956,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,2240,7168,torch.float8_e4m3fnuz,69,0,662.2802,a8w8_bpreshuffle_256x128x160x128_16x16_16x16_8x32x1_8x32x1_1x64x1x4_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,3072,1536,torch.float8_e4m3fnuz,85,0,183.0209,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,4096,512,torch.float8_e4m3fnuz,71,0,109.7738,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,4096,7168,torch.float8_e4m3fnuz,102,0,937.0884,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,4608,7168,torch.float8_e4m3fnuz,102,0,1071.1237,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,7168,256,torch.float8_e4m3fnuz,71,0,123.4161,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,7168,2048,torch.float8_e4m3fnuz,71,0,509.0789,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,7168,2304,torch.float8_e4m3fnuz,71,0,560.4929,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,7168,16384,torch.float8_e4m3fnuz,85,0,3671.969,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,7168,18432,torch.float8_e4m3fnuz,85,0,4139.4899,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,8192,1536,torch.float8_e4m3fnuz,71,0,455.0275,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,11264,1536,torch.float8_e4m3fnuz,71,0,614.8496,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,24576,1536,torch.float8_e4m3fnuz,71,0,1348.8178,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,32768,512,torch.float8_e4m3fnuz,71,0,793.4262,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,32768,1536,torch.float8_e4m3fnuz,102,0,1794.6908,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,6144,36864,7168,torch.float8_e4m3fnuz,93,0,8229.8778,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,8192,128,7168,torch.float8_e4m3fnuz,124,0,61.8924,a8w8_bpreshuffle_256x112x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,8192,4096,7168,torch.float8_e4m3fnuz,85,0,1259.5274,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,8192,7168,16384,torch.float8_e4m3fnuz,85,0,4942.7227,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,8192,7168,18432,torch.float8_e4m3fnuz,85,0,5501.8021,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,8192,8192,1536,torch.float8_e4m3fnuz,72,0,621.3308,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,8192,24576,1536,torch.float8_e4m3fnuz,71,0,1786.9965,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,8192,32768,512,torch.float8_e4m3fnuz,71,0,1053.245,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,8192,32768,1536,torch.float8_e4m3fnuz,71,0,2345.8711,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,8192,36864,7168,torch.float8_e4m3fnuz,93,0,10983.7146,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,128,7168,torch.float8_e4m3fnuz,121,0,63.2764,a8w8_bpreshuffle_256x64x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,10240,512,7168,torch.float8_e4m3fnuz,0,0,211.3264,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,576,7168,torch.float8_e4m3fnuz,68,0,230.3376,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,1536,7168,torch.float8_e4m3fnuz,68,0,583.4998,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,2240,7168,torch.float8_e4m3fnuz,69,0,1018.0438,a8w8_bpreshuffle_256x128x160x128_16x16_16x16_8x32x1_8x32x1_1x64x1x4_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,3072,1536,torch.float8_e4m3fnuz,71,0,290.0173,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,4096,512,torch.float8_e4m3fnuz,71,0,173.8713,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,4096,7168,torch.float8_e4m3fnuz,85,0,1574.2695,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,4608,7168,torch.float8_e4m3fnuz,93,0,1722.172,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,7168,256,torch.float8_e4m3fnuz,71,0,199.187,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,7168,2048,torch.float8_e4m3fnuz,71,0,843.0746,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,7168,2304,torch.float8_e4m3fnuz,71,0,913.7132,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,7168,16384,torch.float8_e4m3fnuz,85,0,6072.4257,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,7168,18432,torch.float8_e4m3fnuz,85,0,6834.6666,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,8192,1536,torch.float8_e4m3fnuz,71,0,754.6592,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,11264,1536,torch.float8_e4m3fnuz,71,0,1039.4493,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,24576,1536,torch.float8_e4m3fnuz,71,0,2209.3759,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,32768,512,torch.float8_e4m3fnuz,72,0,1357.7711,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,10240,32768,1536,torch.float8_e4m3fnuz,71,0,2923.8951,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,10240,36864,7168,torch.float8_e4m3fnuz,93,0,13738.3404,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,128,7168,torch.float8_e4m3fnuz,141,0,84.5069,a8w8_bpreshuffle_256x160x128x128_16x16_16x16_8x32x1_8x32x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,12288,512,7168,torch.float8_e4m3fnuz,0,0,262.6171,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,576,7168,torch.float8_e4m3fnuz,94,0,285.6522,a8w8_bpreshuffle_256x96x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,1536,7168,torch.float8_e4m3fnuz,94,0,718.7933,a8w8_bpreshuffle_256x96x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,2240,7168,torch.float8_e4m3fnuz,69,0,1231.0498,a8w8_bpreshuffle_256x128x160x128_16x16_16x16_8x32x1_8x32x1_1x64x1x4_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,3072,1536,torch.float8_e4m3fnuz,72,0,352.0628,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,12288,4096,512,torch.float8_e4m3fnuz,71,0,207.219,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,4096,7168,torch.float8_e4m3fnuz,102,0,1858.9016,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,4608,7168,torch.float8_e4m3fnuz,68,0,2079.1883,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,7168,256,torch.float8_e4m3fnuz,71,0,243.398,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,7168,2048,torch.float8_e4m3fnuz,71,0,1001.5776,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,7168,2304,torch.float8_e4m3fnuz,72,0,1134.7441,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,12288,7168,16384,torch.float8_e4m3fnuz,102,0,7272.8109,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,7168,18432,torch.float8_e4m3fnuz,102,0,8054.046,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,8192,1536,torch.float8_e4m3fnuz,102,0,912.1361,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,11264,1536,torch.float8_e4m3fnuz,71,0,1221.6133,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,24576,1536,torch.float8_e4m3fnuz,71,0,2656.0688,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,32768,512,torch.float8_e4m3fnuz,71,0,1573.8691,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,32768,1536,torch.float8_e4m3fnuz,71,0,3526.7572,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,12288,36864,7168,torch.float8_e4m3fnuz,93,0,16426.5845,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,128,7168,torch.float8_e4m3fnuz,86,0,91.5501,a8w8_bpreshuffle_256x96x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,512,7168,torch.float8_e4m3fnuz,102,0,306.9194,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,576,7168,torch.float8_e4m3fnuz,93,0,341.468,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,1536,7168,torch.float8_e4m3fnuz,93,0,830.4984,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,2240,7168,torch.float8_e4m3fnuz,69,0,1461.7946,a8w8_bpreshuffle_256x128x160x128_16x16_16x16_8x32x1_8x32x1_1x64x1x4_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,3072,1536,torch.float8_e4m3fnuz,93,0,405.8954,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,4096,512,torch.float8_e4m3fnuz,71,0,239.3595,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,4096,7168,torch.float8_e4m3fnuz,85,0,2197.8676,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,4608,7168,torch.float8_e4m3fnuz,93,0,2431.1066,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,7168,256,torch.float8_e4m3fnuz,72,0,297.7928,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,14336,7168,2048,torch.float8_e4m3fnuz,74,0,1199.9511,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,14336,7168,2304,torch.float8_e4m3fnuz,71,0,1310.3925,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,7168,16384,torch.float8_e4m3fnuz,85,0,8455.8581,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,7168,18432,torch.float8_e4m3fnuz,85,0,9585.3754,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,8192,1536,torch.float8_e4m3fnuz,71,0,1039.5469,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,11264,1536,torch.float8_e4m3fnuz,71,0,1422.2274,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,24576,1536,torch.float8_e4m3fnuz,71,0,3089.1315,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,32768,512,torch.float8_e4m3fnuz,71,0,1827.3845,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,32768,1536,torch.float8_e4m3fnuz,71,0,4108.8811,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,14336,36864,7168,torch.float8_e4m3fnuz,93,0,19165.8682,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,16384,128,7168,torch.float8_e4m3fnuz,0,0,114.6838,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,16384,4096,7168,torch.float8_e4m3fnuz,85,0,2498.7773,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,16384,7168,16384,torch.float8_e4m3fnuz,85,0,9679.208,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,16384,7168,18432,torch.float8_e4m3fnuz,85,0,10922.6859,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,16384,8192,1536,torch.float8_e4m3fnuz,71,0,1182.4869,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,16384,24576,1536,torch.float8_e4m3fnuz,71,0,3536.4999,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,16384,32768,512,torch.float8_e4m3fnuz,71,0,2090.8366,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,16384,32768,1536,torch.float8_e4m3fnuz,71,0,4672.9569,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,16384,36864,7168,torch.float8_e4m3fnuz,71,0,21975.6028,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,32768,128,7168,torch.float8_e4m3fnuz,121,0,208.4886,a8w8_bpreshuffle_256x64x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,32768,576,7168,torch.float8_e4m3fnuz,68,0,729.7961,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,32768,1536,7168,torch.float8_e4m3fnuz,68,0,1872.4274,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,32768,3072,1536,torch.float8_e4m3fnuz,71,0,909.865,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,32768,4096,7168,torch.float8_e4m3fnuz,74,0,5016.1101,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,32768,7168,2048,torch.float8_e4m3fnuz,72,0,2704.1764,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,32768,7168,16384,torch.float8_e4m3fnuz,85,0,19294.1288,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,32768,7168,18432,torch.float8_e4m3fnuz,85,0,21856.5731,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,32768,8192,1536,torch.float8_e4m3fnuz,71,0,2344.2068,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,32768,24576,1536,torch.float8_e4m3fnuz,71,0,7017.8836,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,32768,32768,512,torch.float8_e4m3fnuz,71,0,4165.6734,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,32768,32768,1536,torch.float8_e4m3fnuz,71,0,9432.9791,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,32768,36864,7168,torch.float8_e4m3fnuz,93,0,43711.479,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,128,7168,torch.float8_e4m3fnuz,121,0,383.2232,a8w8_bpreshuffle_256x64x128x256_16x16_16x16_16x16x1_16x16x1_1x16x1x16_8x8x1_1x2_intrawave_v3,0,0,0 +80,65536,512,7168,torch.float8_e4m3fnuz,70,0,1327.0618,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,576,7168,torch.float8_e4m3fnuz,93,0,1429.9239,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,1536,7168,torch.float8_e4m3fnuz,93,0,3736.9156,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,3072,1536,torch.float8_e4m3fnuz,71,0,1787.3796,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,4096,512,torch.float8_e4m3fnuz,71,0,1055.1661,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,4096,7168,torch.float8_e4m3fnuz,85,0,9976.6212,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,4608,7168,torch.float8_e4m3fnuz,68,0,11016.6648,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,7168,256,torch.float8_e4m3fnuz,71,0,1212.7558,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,7168,2048,torch.float8_e4m3fnuz,71,0,5287.9696,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,7168,2304,torch.float8_e4m3fnuz,71,0,5871.5636,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,7168,16384,torch.float8_e4m3fnuz,85,0,38748.1812,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,7168,18432,torch.float8_e4m3fnuz,85,0,43467.0784,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,8192,1536,torch.float8_e4m3fnuz,71,0,4684.7126,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,24576,1536,torch.float8_e4m3fnuz,0,0,inf,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,32768,512,torch.float8_e4m3fnuz,74,0,,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v1,0,0,0 +80,65536,32768,1536,torch.float8_e4m3fnuz,0,0,,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,65536,36864,7168,torch.float8_e4m3fnuz,0,0,,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,128,7168,torch.float8_e4m3fnuz,87,0,561.5334,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x16x1x16_4x4x1_1x1_intrawave_v3,0,0,0 +80,98304,512,7168,torch.float8_e4m3fnuz,102,0,1866.4062,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,576,7168,torch.float8_e4m3fnuz,95,0,2112.9835,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x16x1x16_4x4x1_1x1_intrawave_v3,0,0,0 +80,98304,1536,7168,torch.float8_e4m3fnuz,102,0,5497.497,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,2240,7168,torch.float8_e4m3fnuz,69,0,9755.5399,a8w8_bpreshuffle_256x128x160x128_16x16_16x16_8x32x1_8x32x1_1x64x1x4_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,3072,1536,torch.float8_e4m3fnuz,102,0,2698.4705,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,4096,512,torch.float8_e4m3fnuz,71,0,1607.6098,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,4096,7168,torch.float8_e4m3fnuz,102,0,14573.0079,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,4608,7168,torch.float8_e4m3fnuz,102,0,16377.2032,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,7168,256,torch.float8_e4m3fnuz,72,0,1997.5301,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,98304,7168,2048,torch.float8_e4m3fnuz,72,0,8086.5215,a8w8_bpreshuffle_256x64x256x64_16x16_16x16_4x64x1_4x64x1_1x16x1x16_8x8x1_1x2_intrawave_v1,0,0,0 +80,98304,7168,2304,torch.float8_e4m3fnuz,71,0,8879.3992,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,7168,16384,torch.float8_e4m3fnuz,102,0,57025.1637,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,7168,18432,torch.float8_e4m3fnuz,102,0,63990.1906,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,8192,1536,torch.float8_e4m3fnuz,71,0,7034.6763,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,11264,1536,torch.float8_e4m3fnuz,71,0,9673.3902,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,24576,1536,torch.float8_e4m3fnuz,0,0,,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,98304,32768,1536,torch.float8_e4m3fnuz,0,0,,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,128,7168,torch.float8_e4m3fnuz,0,0,719.3472,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,512,7168,torch.float8_e4m3fnuz,102,0,2615.157,a8w8_bpreshuffle_256x96x256x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,576,7168,torch.float8_e4m3fnuz,93,0,2822.5197,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,1536,7168,torch.float8_e4m3fnuz,68,0,7429.8789,a8w8_bpreshuffle_256x128x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,3072,1536,torch.float8_e4m3fnuz,71,0,3559.9187,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,4096,512,torch.float8_e4m3fnuz,71,0,2125.6078,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,4096,7168,torch.float8_e4m3fnuz,85,0,19941.3998,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,4608,7168,torch.float8_e4m3fnuz,93,0,21987.1209,a8w8_bpreshuffle_256x64x192x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,7168,256,torch.float8_e4m3fnuz,71,0,2444.3347,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,7168,2048,torch.float8_e4m3fnuz,71,0,10519.7293,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,7168,2304,torch.float8_e4m3fnuz,71,0,11621.1373,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,7168,16384,torch.float8_e4m3fnuz,85,0,77358.0635,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,7168,18432,torch.float8_e4m3fnuz,85,0,87126.4572,a8w8_bpreshuffle_256x64x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,8192,1536,torch.float8_e4m3fnuz,71,0,9394.7156,a8w8_bpreshuffle_256x128x128x64_16x16_16x16_4x64x1_4x64x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0 +80,131072,24576,1536,torch.float8_e4m3fnuz,0,0,,a8w8_bpreshuffle_256x128x128x128_16x16_16x16_8x32x1_8x32x1_1x32x1x8_8x8x1_2x1_intrawave_v3,0,0,0