@@ -475,28 +475,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
475475; GFX9-O0-NEXT: ; implicit-def: $sgpr8
476476; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
477477; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
478+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
479+ ; GFX9-O0-NEXT: s_nop 0
480+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
478481; GFX9-O0-NEXT: ; implicit-def: $sgpr8
479482; GFX9-O0-NEXT: ; implicit-def: $sgpr8
480483; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
481484; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
482- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
483- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
484- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
485- ; GFX9-O0-NEXT: s_nop 0
486- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
487- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
488- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
489- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
485+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
490486; GFX9-O0-NEXT: s_nop 0
491- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
492- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
493- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
487+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
488+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
494489; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
495- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
496- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
490+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
497491; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
498- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
499- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
492+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
500493; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
501494; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
502495; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -507,7 +500,6 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
507500; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
508501; GFX9-O0-NEXT: s_mov_b32 s14, s13
509502; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
510- ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
511503; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
512504; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
513505; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1046,10 +1038,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
10461038; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
10471039; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
10481040; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1049- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1050- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1051- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1052- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1041+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1042+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1043+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1044+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
10531045; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
10541046; GFX9-O0-NEXT: s_mov_b32 s5, s6
10551047; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2667,28 +2659,21 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26672659; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26682660; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
26692661; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2662+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2663+ ; GFX9-O0-NEXT: s_nop 0
2664+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
26702665; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26712666; GFX9-O0-NEXT: ; implicit-def: $sgpr8
26722667; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
26732668; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2674- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2675- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2676- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2677- ; GFX9-O0-NEXT: s_nop 0
2678- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2679- ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2680- ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2681- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2669+ ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
26822670; GFX9-O0-NEXT: s_nop 0
2683- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2684- ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2685- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
2671+ ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2672+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
26862673; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2687- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2688- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
2674+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
26892675; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2690- ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2691- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
2676+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
26922677; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
26932678; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
26942679; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -2699,7 +2684,6 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
26992684; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
27002685; GFX9-O0-NEXT: s_mov_b32 s14, s13
27012686; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2702- ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
27032687; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
27042688; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
27052689; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3238,10 +3222,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
32383222; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
32393223; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
32403224; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3241- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3242- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3243- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3244- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3225+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3226+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3227+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3228+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
32453229; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
32463230; GFX9-O0-NEXT: s_mov_b32 s5, s6
32473231; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments