@@ -475,28 +475,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
475475; GFX9-O0-NEXT: ; implicit-def: $sgpr8
476476; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
477477; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
478+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
479+ ; GFX9-O0-NEXT: s_nop 0
480+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
478481; GFX9-O0-NEXT: ; implicit-def: $sgpr8
479482; GFX9-O0-NEXT: ; implicit-def: $sgpr8
480483; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
481484; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
482- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
483- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
484- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
485- ; GFX9-O0-NEXT: s_nop 0
486- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
487- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
488- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
489- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
485+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
490486; GFX9-O0-NEXT: s_nop 0
491- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
492- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
493- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
487+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
488+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
494489; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
495- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
496- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
490+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
497491; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
498- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
499- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
492+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
500493; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
501494; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
502495; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -507,7 +500,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
507500; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
508501; GFX9-O0-NEXT: s_mov_b32 s14, s13
509502; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
510- ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
511503; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
512504; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
513505; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1043,10 +1035,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10431035; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
10441036; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
10451037; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1046- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1047- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1048- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1049- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1038+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1039+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1040+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1041+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10501042; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
10511043; GFX9-O0-NEXT: s_mov_b32 s5, s6
10521044; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2664,28 +2656,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26642656; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26652657; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
26662658; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2659+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2660+ ; GFX9-O0-NEXT: s_nop 0
2661+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
26672662; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26682663; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26692664; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
26702665; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2671- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2672- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2673- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2674- ; GFX9-O0-NEXT: s_nop 0
2675- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2676- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2677- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2678- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2666+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
26792667; GFX9-O0-NEXT: s_nop 0
2680- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2681- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2682- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
2668+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2669+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
26832670; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2684- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2685- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
2671+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
26862672; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2687- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2688- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
2673+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
26892674; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
26902675; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
26912676; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -2696,7 +2681,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26962681; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
26972682; GFX9-O0-NEXT: s_mov_b32 s14, s13
26982683; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2699- ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
27002684; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
27012685; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
27022686; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3232,10 +3216,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
32323216; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
32333217; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
32343218; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3235- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3236- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3237- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3238- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3219+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3220+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3221+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3222+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
32393223; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
32403224; GFX9-O0-NEXT: s_mov_b32 s5, s6
32413225; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments