Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions aiter/configs/tuned_fmoe.csv
Original file line number Diff line number Diff line change
Expand Up @@ -772,3 +772,17 @@ cu_num,token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w,
80,256,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,536.7655,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,536.7655,1,47.26,2646.03
80,512,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,560.4425,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,560.4425,1,90.53,2544.06
80,1024,7168,256,257,9,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,827.4898,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,0.0%,0.0,Null,0.0%,827.4898,1,122.62,1736.35
80,16,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,274.0603,_ZN5aiter59fmoe_stage1_bf16_pertokenFp8_blockscale_g1u1_16x256_2tg_pf3E,4.9%,150.3324,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.3%,424.3927,0,33.21,3425.3
80,32,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,16,0,359.0112,moe_ck2stages_gemm1_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight0_silu_F8_F8_B16,0.0%,190.8827,moe_ck2stages_gemm2_256x16x128x256_1x4_MulABScaleExpertWeightA8W8blkscale_v1_Nswizzle0_Quant4_MulRoutedWeight1_F8_F8_B16,0.2%,549.8939,0,51.26,2644.17
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no 2 stage solution should be here

80,64,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,631.2833,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.9%,0.0,Null,0.0%,631.2833,1,89.3,2304.36
80,128,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,772.5524,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.9%,0.0,Null,0.0%,772.5524,1,145.94,1884.76
80,256,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,1166.708,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.9%,0.0,Null,0.0%,1166.708,1,193.27,1250.38
80,512,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,2209.3824,_ZN5aiter47fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_32x256E,0.8%,0.0,Null,0.0%,2209.3824,1,204.12,662.78
80,1024,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_1x128,1,0,32,0,4205.8762,_ZN5aiter50fmoe_bf16_blockscaleFp8_g1u1_vs_silu_1tg_ps_32x256E,0.9%,0.0,Null,0.0%,4205.8762,1,214.45,350.78
80,16,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,442.3731,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.8%,0.0,Null,0.0%,442.3731,1,31.86,3286.07
80,32,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,520.7061,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,520.7061,1,54.13,2792.39
80,64,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,622.6569,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x256E,6.9%,0.0,Null,0.0%,622.6569,1,90.53,2336.28
80,128,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,687.274,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.2%,0.0,Null,0.0%,687.274,1,164.04,2118.63
80,256,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1021.9423,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,1021.9423,1,220.64,1427.51
80,512,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1749.1923,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,1749.1923,1,257.82,837.15
80,1024,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26
Loading