Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion aiter/configs/tuned_fmoe.csv
Original file line number Diff line number Diff line change
Expand Up @@ -764,4 +764,11 @@ cu_num,token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,
80,256,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,597.1745,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,373.7546,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,970.9291,0,46.45,2908.63
80,512,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,734.3088,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,509.0543,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1243.3631,0,72.54,2275.75
80,1024,7168,512,256,8,ActivationType.Gelu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,1,16,0,1040.5956,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_gelu_F8_F8_B16,0.0%,844.24,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_F8_F8_B16,16.7%,1884.8356,0,95.71,1507.08
80,56,6144,4096,8,2,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,228.7482,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.5%,0.0,Null,0.0%,228.7482,1,73.93,2644.88
80,56,6144,4096,8,2,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,228.7482,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.5%,0.0,Null,0.0%,228.7482,1,73.93,2644.88
80,16,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,245.0416,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,245.0416,1,6.47,5775.08
80,32,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,370.7841,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,370.7841,1,8.55,3817.53
80,64,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,428.9409,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,428.9409,1,14.78,3301.54
80,128,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,519.4664,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,519.4664,1,24.42,2728.85
80,256,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,536.7655,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,536.7655,1,47.26,2646.03
80,512,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,560.4425,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,560.4425,1,90.53,2544.06
80,1024,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,827.4898,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,827.4898,1,122.62,1736.35