diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtna.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtna.co index af10ab6df5..49492530a7 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtna.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtna.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtna_group.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtna_group.co index ac1ca972ac..3497784a8b 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtna_group.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtna_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtne.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtne.co index e4a46bd725..b12af28c73 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtne.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtne.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtne_group.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtne_group.co index 4b8000efeb..5c483e167d 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtne_group.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtne_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtz.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtz.co index ab519bce8e..9c98469988 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtz.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtz.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtz_group.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtz_group.co index fbd5eee308..e266155828 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtz_group.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_causal_rtz_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtna.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtna.co index 092e402ad1..f2b56d9aa2 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtna.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtna.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtna_group.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtna_group.co index 48ac9e54a5..9eaf54e9db 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtna_group.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtna_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtne.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtne.co index a63a8c2940..ab464e4f4c 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtne.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtne.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtne_group.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtne_group.co index 27c55d1937..65aefb9403 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtne_group.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtne_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtz.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtz.co index 6edbc54bbd..91cb20f2a6 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtz.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtz.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtz_group.co b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtz_group.co index b6d3e01639..d0d53352d3 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtz_group.co and b/hsa/gfx942/fmha_v3_fwd/MI300/fwd_hd128_bf16_rtz_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtna.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtna.co index 69ab645de8..840e6c8dad 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtna.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtna.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtna_group.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtna_group.co index 9b97c147e9..51733bce3c 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtna_group.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtna_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtne.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtne.co index 7f5c12bc05..6b8e5dda91 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtne.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtne.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtne_group.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtne_group.co index 4032e8d161..9bf17fcd5f 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtne_group.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtne_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtz.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtz.co index 302a658023..33b882352e 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtz.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtz.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtz_group.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtz_group.co index 6c2cb554b4..57a453e43c 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtz_group.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_causal_rtz_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtna.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtna.co index fd75121550..1212d40271 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtna.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtna.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtna_group.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtna_group.co index 0b4bf90955..95c9a4a1f3 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtna_group.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtna_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtne.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtne.co index 3f0351962b..2fc063ea94 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtne.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtne.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtne_group.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtne_group.co index 52f27a9f9d..1aa7bbc078 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtne_group.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtne_group.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtz.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtz.co index a484b398bf..9148e582f6 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtz.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtz.co differ diff --git a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtz_group.co b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtz_group.co index 00d8f51b7a..d7ee230d66 100755 Binary files a/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtz_group.co and b/hsa/gfx942/fmha_v3_fwd/MI308/fwd_hd128_bf16_rtz_group.co differ diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16.co index 460d4d2ec2..6e9af06220 100755 Binary files a/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16.co and b/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16.co differ diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_causal.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_causal.co index 2d8bf0bb7f..dae2c6e62f 100755 Binary files a/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_causal.co and b/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_causal.co differ diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_causal_group.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_causal_group.co index 35a76e1b82..81a4dc650e 100755 Binary files a/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_causal_group.co and b/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_causal_group.co differ diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_group.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_group.co index 21758854b5..ded08c3eed 100755 Binary files a/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_group.co and b/hsa/gfx950/fmha_v3_fwd/fwd_hd128_bf16_group.co differ diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16.co index 9890d4dfa6..482d44bd21 100755 Binary files a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16.co and b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16.co differ diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_causal.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_causal.co index e3a45201f4..5f994f73b1 100755 Binary files a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_causal.co and b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_causal.co differ diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_causal_group.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_causal_group.co index ec05c4ed25..b5ea7beff0 100755 Binary files a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_causal_group.co and b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_causal_group.co differ diff --git a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_group.co b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_group.co index b2c6afde13..794fd00158 100755 Binary files a/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_group.co and b/hsa/gfx950/fmha_v3_fwd/fwd_hd192_hd128_bf16_group.co differ diff --git a/op_tests/cpp/mha/smoke_test_fwd_v3.sh b/op_tests/cpp/mha/smoke_test_fwd_v3.sh index 23a980d35e..3e0432b42f 100644 --- a/op_tests/cpp/mha/smoke_test_fwd_v3.sh +++ b/op_tests/cpp/mha/smoke_test_fwd_v3.sh @@ -24,7 +24,7 @@ run_gfx950_fwd_v3() { for mask in 0 2 ; do for lse in 0 1 ; do for seqlen_q in 127 192 301 512 1024; do - for seqlen_k in 512 700 1023 1058; do + for seqlen_k in 0 129 512 700 1023 1058; do $EXE -prec=bf16 -b=2 -h=4 -h_k=2 -d=$head_dim -d_v=128 -s=$seqlen_q -s_k=$seqlen_k -iperm=$i_perm -operm=$o_perm -mask=$mask -lse=$lse -fwd_v3=1 -mode=$mode -kname=$KNAME $COMMON_ARGS $EXE -prec=bf16 -b=1 -h=3 -h_k=1 -d=$head_dim -d_v=128 -s=$seqlen_q -s_k=$seqlen_k -iperm=$i_perm -operm=$o_perm -mask=$mask -lse=$lse -fwd_v3=1 -mode=$mode -kname=$KNAME $COMMON_ARGS @@ -36,6 +36,10 @@ run_gfx950_fwd_v3() { $EXE -prec=bf16 -b=1 -h=1 -h_k=1 -d=$head_dim -d_v=128 -s=$seqlen_q -s_k=$seqlen_q -iperm=$i_perm -operm=$o_perm -mask=1 -lse=$lse -fwd_v3=1 -mode=$mode -kname=$KNAME $COMMON_ARGS fi + if [[ "$mode" = "1" ]]; then + $EXE -prec=bf16 -b=2 -h=4 -h_k=2 -d=128 -s=$seqlen_q,$seqlen_k -s_k=$seqlen_k,0 -iperm=$i_perm -operm=$o_perm -mask=$mask -lse=$lse -fwd_v3=1 -v3_bf16_cvt=$v3_bf16_cvt -mode=$mode -kname=$KNAME $COMMON_ARGS + fi + done done done @@ -54,7 +58,7 @@ run_gfx942_fwd_v3() { for mask in 0 2 ; do for lse in 0 1 ; do for seqlen_q in 127 192 301 512 1024; do - for seqlen_k in 512 700 1023 1058; do + for seqlen_k in 0 129 512 700 1023 1058; do for v3_bf16_cvt in 0 1 2; do $EXE -prec=bf16 -b=2 -h=4 -h_k=2 -d=128 -s=$seqlen_q -s_k=$seqlen_k -iperm=$i_perm -operm=$o_perm -mask=$mask -lse=$lse -fwd_v3=1 -v3_bf16_cvt=$v3_bf16_cvt -mode=$mode -kname=$KNAME $COMMON_ARGS @@ -67,6 +71,10 @@ run_gfx942_fwd_v3() { $EXE -prec=bf16 -b=1 -h=1 -h_k=1 -d=128 -s=$seqlen_q -s_k=$seqlen_q -iperm=$i_perm -operm=$o_perm -mask=1 -lse=$lse -fwd_v3=1 -v3_bf16_cvt=$v3_bf16_cvt -mode=$mode -kname=$KNAME $COMMON_ARGS fi + if [[ "$mode" = "1" ]]; then + $EXE -prec=bf16 -b=2 -h=4 -h_k=2 -d=128 -s=$seqlen_q,$seqlen_k -s_k=$seqlen_k,0 -iperm=$i_perm -operm=$o_perm -mask=$mask -lse=$lse -fwd_v3=1 -v3_bf16_cvt=$v3_bf16_cvt -mode=$mode -kname=$KNAME $COMMON_ARGS + fi + done done done