diff --git a/aiter/configs/tuned_fmoe.csv b/aiter/configs/tuned_fmoe.csv index ed165065d7..d1a5e65a62 100644 --- a/aiter/configs/tuned_fmoe.csv +++ b/aiter/configs/tuned_fmoe.csv @@ -786,3 +786,4 @@ cu_num,token,model_dim,inter_dim,expert,topk,act_type,dtype,q_dtype_a,q_dtype_w, 80,256,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1021.9423,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,1021.9423,1,220.64,1427.51 80,512,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,1749.1923,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.4%,0.0,Null,0.0%,1749.1923,1,257.82,837.15 80,1024,7168,2048,33,10,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fnuz,torch.float8_e4m3fnuz,QuantType.per_Token,1,0,32,0,3226.5114,_ZN5aiter45fmoe_bf16_pertokenFp8_g1u1_vs_silu_1tg_32x512E,5.3%,0.0,Null,0.0%,3226.5114,1,279.54,457.26 +256,16,5120,1024,128,1,ActivationType.Silu,torch.bfloat16,torch.float8_e4m3fn,torch.float8_e4m3fn,QuantType.per_Token,1,1,32,0,0.0,_ZN5aiter46fmoe_bf16_pertokenFp8_g1u1_tkw1_silu_1tg_32x64E,0.0%,0.0,Null,0,0.0,1,0.0,0.0 diff --git a/hsa/gfx950/fmoe/gelu/fmoe_bf16_pertokenFp8_g1u1_gelu_tkw1.csv b/hsa/gfx950/fmoe/gelu/fmoe_bf16_pertokenFp8_g1u1_gelu_tkw1.csv index 5537a862c9..bd6df63fc4 100644 --- a/hsa/gfx950/fmoe/gelu/fmoe_bf16_pertokenFp8_g1u1_gelu_tkw1.csv +++ b/hsa/gfx950/fmoe/gelu/fmoe_bf16_pertokenFp8_g1u1_gelu_tkw1.csv @@ -13,3 +13,4 @@ _ZN5aiter50fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_gelu_1tg_32x256E,fmoe_bf16_pertoke _ZN5aiter50fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_gelu_1tg_32x512E,fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_gelu_1tg_32x512.co,0,1,0,1,0,32,512 _ZN5aiter50fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_gelu_1tg_32x384E,fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_gelu_1tg_32x384.co,0,1,0,1,0,32,384 _ZN5aiter53fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_gelu_1tg_ps_32x448E,fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_gelu_1tg_ps_32x448.co,0,1,0,1,1,32,448 +_ZN5aiter46fmoe_bf16_pertokenFp8_g1u1_tkw1_gelu_1tg_32x64E,fmoe_bf16_pertokenFp8_g1u1_tkw1_gelu_1tg_32x64.co,0,0,0,1,0,32,64 diff --git a/hsa/gfx950/fmoe/gelu/fmoe_bf16_pertokenFp8_g1u1_tkw1_gelu_1tg_32x64.co b/hsa/gfx950/fmoe/gelu/fmoe_bf16_pertokenFp8_g1u1_tkw1_gelu_1tg_32x64.co new file mode 100755 index 0000000000..f28103d1d1 Binary files /dev/null and b/hsa/gfx950/fmoe/gelu/fmoe_bf16_pertokenFp8_g1u1_tkw1_gelu_1tg_32x64.co differ diff --git a/hsa/gfx950/fmoe/silu/fmoe_bf16_pertokenFp8_g1u1_silu_tkw1.csv b/hsa/gfx950/fmoe/silu/fmoe_bf16_pertokenFp8_g1u1_silu_tkw1.csv index cf40973078..2b1fa20f0b 100644 --- a/hsa/gfx950/fmoe/silu/fmoe_bf16_pertokenFp8_g1u1_silu_tkw1.csv +++ b/hsa/gfx950/fmoe/silu/fmoe_bf16_pertokenFp8_g1u1_silu_tkw1.csv @@ -13,3 +13,4 @@ _ZN5aiter50fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_silu_1tg_32x448E,fmoe_bf16_pertoke _ZN5aiter50fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_silu_1tg_32x128E,fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_silu_1tg_32x128.co,0,1,0,1,0,32,128 _ZN5aiter50fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_silu_1tg_32x192E,fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_silu_1tg_32x192.co,0,1,0,1,0,32,192 _ZN5aiter53fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_silu_1tg_ps_32x512E,fmoe_bf16_pertokenFp8_g1u1_vs_tkw1_silu_1tg_ps_32x512.co,0,1,0,1,1,32,512 +_ZN5aiter46fmoe_bf16_pertokenFp8_g1u1_tkw1_silu_1tg_32x64E,fmoe_bf16_pertokenFp8_g1u1_tkw1_silu_1tg_32x64.co,0,0,0,1,0,32,64 diff --git a/hsa/gfx950/fmoe/silu/fmoe_bf16_pertokenFp8_g1u1_tkw1_silu_1tg_32x64.co b/hsa/gfx950/fmoe/silu/fmoe_bf16_pertokenFp8_g1u1_tkw1_silu_1tg_32x64.co new file mode 100755 index 0000000000..c27dcff923 Binary files /dev/null and b/hsa/gfx950/fmoe/silu/fmoe_bf16_pertokenFp8_g1u1_tkw1_silu_1tg_32x64.co differ